mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-28 08:49:38 +02:00
Compare commits
82 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ca231df5fe | ||
|
|
5645dc4d28 | ||
|
|
967a413a06 | ||
|
|
89f25435d5 | ||
|
|
73e4c8b270 | ||
|
|
77c38e9ea2 | ||
|
|
f61ea76007 | ||
|
|
a155c0b844 | ||
|
|
2830cb5ac7 | ||
|
|
c815e10fb3 | ||
|
|
9f715f93f1 | ||
|
|
144943ec1d | ||
|
|
e550091a76 | ||
|
|
1f16a89c94 | ||
|
|
d62dc46a86 | ||
|
|
fb50c11d16 | ||
|
|
4dae8c34dd | ||
|
|
01ccc73e40 | ||
|
|
e8bfb3d301 | ||
|
|
fde9f9862d | ||
|
|
6c815ef529 | ||
|
|
b81391cd9f | ||
|
|
7e29543398 | ||
|
|
8a50601582 | ||
|
|
9587049283 | ||
|
|
e4e7b40c23 | ||
|
|
e817736b91 | ||
|
|
674b58b3ed | ||
|
|
4e61020089 | ||
|
|
ed44f46f2a | ||
|
|
cf2871ec8b | ||
|
|
feb0818444 | ||
|
|
663eaff940 | ||
|
|
e1067bf734 | ||
|
|
0689d709d2 | ||
|
|
a278d2f7d0 | ||
|
|
00cdf2de90 | ||
|
|
005c5fc860 | ||
|
|
7c3b4cea2c | ||
|
|
28953eb616 | ||
|
|
56e06334d2 | ||
|
|
058051f1b9 | ||
|
|
2877b85adc | ||
|
|
f3f893bf01 | ||
|
|
853f39a7c3 | ||
|
|
036a745fc1 | ||
|
|
b076431b0a | ||
|
|
fd18caa26a | ||
|
|
65de75ebd7 | ||
|
|
0425160857 | ||
|
|
9ff0e86bb8 | ||
|
|
07ab275662 | ||
|
|
7b0023471e | ||
|
|
470802e58e | ||
|
|
66517fc320 | ||
|
|
6b2f7c3365 | ||
|
|
bd3a375081 | ||
|
|
e5425b51a3 | ||
|
|
8050b59f6e | ||
|
|
7ece0b63d3 | ||
|
|
07bbdefa14 | ||
|
|
50dec7bf64 | ||
|
|
48676c74fa | ||
|
|
795a97485a | ||
|
|
5232578d44 | ||
|
|
2c18a62de4 | ||
|
|
2896f9fb91 | ||
|
|
18245c2373 | ||
|
|
0d0ea55184 | ||
|
|
dc39eb7ef9 | ||
|
|
2914407f09 | ||
|
|
bf1fe9748e | ||
|
|
698efdcef8 | ||
|
|
377f21acd7 | ||
|
|
d3e20df1d5 | ||
|
|
d14227468b | ||
|
|
fb7b94b60e | ||
|
|
c3d8cedb0b | ||
|
|
5a8821073b | ||
|
|
ec7edf8f50 | ||
|
|
c2beaf7d55 | ||
|
|
8eb1cd3e79 |
510 changed files with 24068 additions and 5576 deletions
95
AGENTS.md
95
AGENTS.md
|
|
@ -64,6 +64,25 @@ When rules conflict, follow this order:
|
|||
4. Code quality: types, readable boundaries, focused modules
|
||||
5. Performance where it matters
|
||||
|
||||
## Opinionated Product Defaults
|
||||
|
||||
- **MUST**: Prefer one canonical behavior over configurable alternatives. A new
|
||||
flag, config field, environment variable, mode, strategy option, adapter hook,
|
||||
or fallback path is a product feature and must be justified by an explicit
|
||||
user request or a real correctness requirement.
|
||||
- **MUST NOT**: Add speculative flexibility for imagined users, migrations,
|
||||
review preferences, local workflows, or "just in case" scenarios. If the
|
||||
requested behavior can work with one solid default, implement that default.
|
||||
- **MUST NOT**: Add boolean switches that create two runtime paths unless both
|
||||
paths are essential and the user explicitly asked for the choice. Boolean
|
||||
policy knobs are especially suspect because they double the state space and
|
||||
test surface.
|
||||
- **MUST**: When a design seems to need a new option, first try to remove the
|
||||
need by choosing the stronger default, tightening the invariant, or failing
|
||||
clearly. Ask the user before adding the option if it still seems necessary.
|
||||
- **MUST**: Delete obsolete branches, tests, docs, and config when removing a
|
||||
behavior. Do not preserve dormant compatibility paths.
|
||||
|
||||
## Repository Shape
|
||||
|
||||
**ktx** is a pnpm + uv workspace.
|
||||
|
|
@ -192,6 +211,19 @@ autonomously — without being asked the leading question — is the bar.
|
|||
next stack. The only acceptable static patterns are genuinely universal
|
||||
invariants (e.g. DB-engine system catalogs) and ktx's own self-emitted
|
||||
signatures.
|
||||
- **MUST**: Give each capability one implementation and route every caller
|
||||
through it. When some behavior — running a query, resolving a credential or
|
||||
config reference, authenticating, selecting a dialect, loading config —
|
||||
already has a working implementation that some call sites use, make new or
|
||||
divergent call sites depend on that path instead of standing up a second one.
|
||||
Parallel implementations of one capability drift apart silently: a fix, a
|
||||
newly supported input, or an added case lands on one path and not the other,
|
||||
so one entry point (a CLI command, an MCP tool, an ingest stage) succeeds
|
||||
while another fails on the same input. When two paths already do the same
|
||||
job, collapse onto the shared one and delete the duplicate instead of
|
||||
keeping both. When fixing a defect that lives on one path, fix the shared
|
||||
implementation; do not patch the symptom on a forked branch, which preserves
|
||||
the divergence you set out to remove.
|
||||
- **SHOULD**: Before inventing an abstraction or hand-rolling structural logic,
|
||||
search for what already exists and reuse it — the codebase's canonical
|
||||
representation (a structured ref/key type) instead of a parallel string scheme,
|
||||
|
|
@ -212,12 +244,54 @@ Before presenting a design, answer these explicitly:
|
|||
instead of building or parsing my own?
|
||||
5. Am I discarding the better option on a weak or misapplied constraint
|
||||
(one-time vs recurring cost, "more surface area", "more work now")?
|
||||
6. Does another entry point already perform this operation through a shared
|
||||
implementation? If so, am I routing through that path instead of forking a
|
||||
parallel one — and if I'm fixing a bug, am I fixing the shared layer rather
|
||||
than one branch?
|
||||
7. Am I adding a user-visible option or alternate runtime path that the user did
|
||||
not ask for? If yes, can one opinionated default solve the problem instead?
|
||||
8. Does this option multiply behavior by caller path, config value, or local
|
||||
state? If yes, remove it unless it is explicitly required.
|
||||
|
||||
A user question that nudges toward an alternative ("would X help?", "should I
|
||||
always do Y?", "will you hardcode Z?") is a signal that a better option exists.
|
||||
Investigate the implied direction and reason it through *before* defending the
|
||||
original proposal — and prefer to have asked yourself the question first.
|
||||
|
||||
Example: If generated context changes should be saved, choose one save policy
|
||||
and route ingest, setup, memory, indexing, and docs through it. Do not add an
|
||||
`auto_commit`-style switch unless the user explicitly asks for staged-only runs
|
||||
and accepts the extra runtime path.
|
||||
|
||||
## Code Comments and Docstrings
|
||||
|
||||
Code must be self-explanatory. Clear names, types, and signatures do the
|
||||
documenting; a comment or docstring exists only to state what the code cannot
|
||||
show. Everything else belongs in the PR description or nowhere.
|
||||
|
||||
- **MUST**: Keep each comment to 1-3 lines stating only what the code cannot
|
||||
show: a cross-file invariant ("error-severity issues never reach here — the
|
||||
doctor exits on them first"), a required ordering ("ktx.yaml is written
|
||||
before git init, so a crash cannot leave a bare `.git`"), or a library quirk
|
||||
("zod reports unknown record keys as `invalid_key`").
|
||||
- **MUST**: Hold docstrings (Python `"""..."""`, JSDoc/TSDoc) to the same bar.
|
||||
A docstring states a function's purpose or contract in 1-3 lines; when a real
|
||||
quirk or invariant motivates the code, note it once and briefly. Let
|
||||
self-explanatory code carry the rest — a well-named, well-typed function
|
||||
often needs no docstring at all.
|
||||
- **MUST**: State each invariant once, at the public entry point. Do not repeat
|
||||
the same guarantee across a module docstring, a helper, its wrapper, and the
|
||||
call site.
|
||||
- **MUST NOT**: Write multi-paragraph docstrings or prose comment blocks —
|
||||
design rationale, alternatives considered, change narration ("is now written
|
||||
before…"), caller enumerations ("shared by X, Y, and Z"), worked examples
|
||||
that restate the code, or the same explanation repeated in a module docstring
|
||||
and the function it describes. That is the author addressing the reviewer; it
|
||||
belongs in the PR description and rots once merged.
|
||||
- **MAY**: Open a regression test with a 1-3 line comment stating the scenario
|
||||
it guards when the test name cannot carry it. Omit design history and
|
||||
references to removed designs.
|
||||
|
||||
## TypeScript Standards
|
||||
|
||||
- Use Node 22+ and pnpm workspace commands.
|
||||
|
|
@ -337,7 +411,8 @@ use `PascalCase` without the suffix.
|
|||
|
||||
## Telemetry
|
||||
|
||||
**ktx** ships PostHog usage telemetry. When adding commands or events:
|
||||
**ktx** ships PostHog usage telemetry. Catalog telemetry events use strict
|
||||
schemas. When adding commands or events:
|
||||
|
||||
- **MUST NOT**: Add fields that carry user data — file paths, hostnames,
|
||||
environment values, SQL text, schema/table/column names, error messages,
|
||||
|
|
@ -354,6 +429,24 @@ use `PascalCase` without the suffix.
|
|||
of collected data changes. Adding another event with no new field types
|
||||
needs no docs change.
|
||||
|
||||
### Error reports
|
||||
|
||||
**ktx** also sends PostHog Error Tracking `$exception` events when telemetry is
|
||||
enabled. This channel is separate from the strict catalog event schema and is
|
||||
used only for exception diagnostics.
|
||||
|
||||
`$exception` events may include stack frames, error class names, raw error
|
||||
messages, cause chains, `source`, `handled`, `fatal`, runtime version fields,
|
||||
OS/runtime fields, and the hashed `projectId` when known. Stack frames may
|
||||
include local file paths and the local username when those appear in paths.
|
||||
|
||||
`$exception` events must never intentionally include secrets, credentials,
|
||||
database URLs, auth headers, raw argv, raw environment values, SQL text,
|
||||
schema/table/column names as explicit properties, customer row data, user prompt
|
||||
text, or raw MCP arguments. Reporters must redact call-site-provided secret
|
||||
snapshots and common static credential patterns before the SDK serializes the
|
||||
exception.
|
||||
|
||||
## Documentation and Specs
|
||||
|
||||
- Keep public documentation in `README.md`, package READMEs, example READMEs,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# Contributing to KTX
|
||||
# Contributing to ktx
|
||||
|
||||
Thanks for your interest in KTX. This page covers **how to contribute** and
|
||||
Thanks for your interest in **ktx**. This page covers **how to contribute** and
|
||||
the **contributor rewards program**. For development setup, repository
|
||||
layout, and verification commands, see the
|
||||
[Contributing guide in the docs](https://docs.kaelio.com/ktx/docs/community/contributing).
|
||||
|
|
@ -23,7 +23,7 @@ layout, and verification commands, see the
|
|||
## Contributor rewards program
|
||||
|
||||
We send merch to contributors whose pull requests get merged. The goal is
|
||||
to thank the people building KTX with us, not to drive volume.
|
||||
to thank the people building **ktx** with us, not to drive volume.
|
||||
|
||||
### How it works
|
||||
|
||||
|
|
@ -76,7 +76,7 @@ See the [Community & Support](https://docs.kaelio.com/ktx/docs/community/support
|
|||
page for the full guide. The short version:
|
||||
|
||||
- **Questions, "how do I...", setup help, sharing patterns**: join the
|
||||
[KTX Slack](https://join.slack.com/t/ktxcommunity/shared_invite/zt-3y9b44m1x-LVyNNJD5nwaZHq4XS29LMQ).
|
||||
[**ktx** Slack](https://join.slack.com/t/ktxcommunity/shared_invite/zt-3y9b44m1x-LVyNNJD5nwaZHq4XS29LMQ).
|
||||
- **Bugs**: use the [Bug report](.github/ISSUE_TEMPLATE/bug_report.yml)
|
||||
template.
|
||||
- **Feature requests**: use the
|
||||
|
|
@ -87,7 +87,7 @@ page for the full guide. The short version:
|
|||
|
||||
## Code of conduct
|
||||
|
||||
KTX follows the
|
||||
**ktx** follows the
|
||||
[Contributor Covenant](https://www.contributor-covenant.org/version/2/1/code_of_conduct/).
|
||||
Be respectful, assume good intent, and keep discussion focused on the
|
||||
project. Report concerns to the maintainers in Slack or by email at
|
||||
|
|
|
|||
34
README.md
34
README.md
|
|
@ -13,16 +13,20 @@
|
|||
<a href="https://docs.kaelio.com/ktx/docs/"><img src="https://img.shields.io/badge/docs-ktx-22c55e?style=flat-square" alt="Documentation" /></a>
|
||||
<a href="https://join.slack.com/t/ktxcommunity/shared_invite/zt-3y9b44m1x-LVyNNJD5nwaZHq4XS29LMQ"><img src="https://img.shields.io/badge/slack-join%20community-4A154B?style=flat-square&logo=slack&logoColor=white" alt="Join the ktx Slack community" /></a>
|
||||
<a href="https://github.com/Kaelio/ktx/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-Apache%202.0-blue?style=flat-square" alt="License" /></a>
|
||||
<a href="https://www.ycombinator.com/companies?batch=P25"><img src="https://img.shields.io/badge/Y%20Combinator-P25-orange?style=flat-square" alt="Y Combinator P25" /></a>
|
||||
<a href="https://www.ycombinator.com/companies/kaelio"><img src="https://img.shields.io/badge/Y%20Combinator-P25-orange?style=flat-square" alt="Y Combinator P25" /></a>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://docs.kaelio.com/ktx/docs/getting-started/quickstart"><b>Quickstart</b></a> ·
|
||||
<a href="https://docs.kaelio.com/ktx/docs/cli-reference/ktx"><b>CLI Reference</b></a> ·
|
||||
<a href="https://docs.kaelio.com/ktx/docs/ai-resources/agent-quickstart"><b>Agent Setup</b></a> ·
|
||||
<a href="https://docs.kaelio.com/ktx/docs/community/ai-resources"><b>Agent Setup</b></a> ·
|
||||
<a href="https://join.slack.com/t/ktxcommunity/shared_invite/zt-3y9b44m1x-LVyNNJD5nwaZHq4XS29LMQ"><b>Slack</b></a>
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<sub>Built and maintained by <a href="https://www.kaelio.com"><b>Kaelio</b></a></sub>
|
||||
</p>
|
||||
|
||||
---
|
||||
|
||||
**ktx** is a self-improving context layer that teaches agents how to query your
|
||||
|
|
@ -139,6 +143,14 @@ Agent integration ready: yes (codex:project)
|
|||
> If `ktx status` prints `ktx mcp start --project-dir ...`, run it before
|
||||
> opening your agent client.
|
||||
|
||||
## Upgrading
|
||||
|
||||
Re-run the global install with the `@latest` tag:
|
||||
|
||||
```bash
|
||||
npm install -g @kaelio/ktx@latest
|
||||
```
|
||||
|
||||
## First commands
|
||||
|
||||
| Command | Purpose |
|
||||
|
|
@ -197,7 +209,7 @@ then the current directory. Pass `--project-dir <path>` when scripting.
|
|||
- [The Context Layer](https://docs.kaelio.com/ktx/docs/concepts/the-context-layer)
|
||||
- [Building Context](https://docs.kaelio.com/ktx/docs/guides/building-context)
|
||||
- [CLI Reference](https://docs.kaelio.com/ktx/docs/cli-reference/ktx)
|
||||
- [Agent Quickstart](https://docs.kaelio.com/ktx/docs/ai-resources/agent-quickstart)
|
||||
- [AI Resources](https://docs.kaelio.com/ktx/docs/community/ai-resources)
|
||||
- [Community & Support](https://docs.kaelio.com/ktx/docs/community/support)
|
||||
|
||||
## Community
|
||||
|
|
@ -247,11 +259,17 @@ uv run pytest -q
|
|||
|
||||
## Telemetry
|
||||
|
||||
**ktx** collects anonymous usage telemetry from interactive CLI runs to
|
||||
improve setup, command reliability, and data-agent workflows. No file paths,
|
||||
hostnames, SQL, schema names, error messages, or argv are recorded. See
|
||||
[Telemetry](https://docs.kaelio.com/ktx/docs/community/telemetry) for the
|
||||
event catalog and opt-out options.
|
||||
**ktx** collects privacy-conscious usage telemetry to understand installs and
|
||||
improve setup, command reliability, and data-agent workflows. Catalog telemetry
|
||||
events do not record file paths, hostnames, SQL, schema names, table names,
|
||||
column names, error messages, raw environment values, or argv. Error reports use
|
||||
PostHog Error Tracking and can include stack frames and raw error messages,
|
||||
which may contain local file paths or the local username in those paths.
|
||||
**ktx** redacts secrets, credentials, database URLs, auth headers, argv, raw
|
||||
environment values, SQL text, row data, and user-typed prompt or MCP argument
|
||||
text from the explicit `$exception` payload. See
|
||||
[Telemetry](https://docs.kaelio.com/ktx/docs/community/telemetry) for the event
|
||||
catalog and opt-out options.
|
||||
|
||||
## License
|
||||
|
||||
|
|
|
|||
|
|
@ -2,20 +2,20 @@
|
|||
|
||||
## Reporting a vulnerability
|
||||
|
||||
If you believe you've found a security vulnerability in KTX, please report it
|
||||
If you believe you've found a security vulnerability in **ktx**, please report it
|
||||
**privately** through GitHub Security Advisories:
|
||||
|
||||
[Report a vulnerability](https://github.com/Kaelio/ktx/security/advisories/new)
|
||||
|
||||
If you cannot use GitHub Security Advisories, email `support@kaelio.com`
|
||||
instead. Please do **not** open a public issue, post in the KTX Slack, or
|
||||
instead. Please do **not** open a public issue, post in the **ktx** Slack, or
|
||||
share details elsewhere until we have published a fix.
|
||||
|
||||
When reporting, please include:
|
||||
|
||||
- A description of the issue and its impact
|
||||
- Steps to reproduce
|
||||
- The KTX version affected
|
||||
- The **ktx** version affected
|
||||
|
||||
## What to expect
|
||||
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
Before Width: | Height: | Size: 63 KiB After Width: | Height: | Size: 64 KiB |
|
|
@ -869,6 +869,97 @@ body::after {
|
|||
50% { opacity: 0.65; transform: scale(0.9); }
|
||||
}
|
||||
|
||||
/* ═══════════════════════════════════════════
|
||||
GitHub star widget (sidebar footer pill)
|
||||
Rendered as the `icon` of a fumadocs icon-link, so it sits in the footer
|
||||
pill beside the Slack mark and the theme toggle. GitHub mark + star glyph
|
||||
+ live count; the star rotates to coral on hover. The !important sizes win
|
||||
over fumadocs' `[&_svg]:size-4.5` rule on the wrapping link.
|
||||
═══════════════════════════════════════════ */
|
||||
.ktx-stars {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 6px;
|
||||
font-family: var(--font-display), var(--font-sans), sans-serif;
|
||||
font-size: 13px;
|
||||
line-height: 1;
|
||||
}
|
||||
|
||||
/* Push the stars to the opposite (right) end of the footer pill, leaving the
|
||||
Slack mark on the left — like justify-content: space-between. The auto margin
|
||||
absorbs the pill's free space; we cancel the theme toggle's own ms-auto so
|
||||
that single gap lands before the stars, not between stars and the toggle. */
|
||||
#nd-sidebar a[aria-label="Star ktx on GitHub"] {
|
||||
margin-inline-start: auto;
|
||||
}
|
||||
|
||||
#nd-sidebar [data-theme-toggle] {
|
||||
margin-inline-start: 0;
|
||||
}
|
||||
|
||||
.ktx-stars-gh {
|
||||
width: 16px !important;
|
||||
height: 16px !important;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.ktx-stars-count-wrap {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 4px;
|
||||
}
|
||||
|
||||
.ktx-stars-star {
|
||||
width: 12px !important;
|
||||
height: 12px !important;
|
||||
flex-shrink: 0;
|
||||
fill: currentColor;
|
||||
opacity: 0.7;
|
||||
transition:
|
||||
transform 0.3s var(--ktx-ease),
|
||||
fill 0.3s var(--ktx-ease),
|
||||
opacity 0.3s var(--ktx-ease);
|
||||
}
|
||||
|
||||
/* The wrapping fumadocs link owns the hover; rotate + colour the star from it. */
|
||||
#nd-sidebar a:hover .ktx-stars-star {
|
||||
transform: rotate(-14deg) scale(1.12);
|
||||
fill: var(--ktx-coral);
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
.ktx-stars-count {
|
||||
font-weight: 600;
|
||||
font-variant-numeric: tabular-nums;
|
||||
letter-spacing: -0.01em;
|
||||
}
|
||||
|
||||
/* Skeleton shown only on the rare cold (uncached) fetch */
|
||||
.ktx-stars-skeleton-bar {
|
||||
display: inline-block;
|
||||
width: 26px;
|
||||
height: 10px;
|
||||
border-radius: 4px;
|
||||
background: linear-gradient(
|
||||
90deg,
|
||||
var(--color-fd-muted) 25%,
|
||||
color-mix(in oklch, var(--color-fd-muted-foreground) 28%, var(--color-fd-muted)) 50%,
|
||||
var(--color-fd-muted) 75%
|
||||
);
|
||||
background-size: 200% 100%;
|
||||
animation: ktx-stars-shimmer 1.4s ease-in-out infinite;
|
||||
}
|
||||
|
||||
@keyframes ktx-stars-shimmer {
|
||||
from { background-position: 200% 0; }
|
||||
to { background-position: -200% 0; }
|
||||
}
|
||||
|
||||
@media (prefers-reduced-motion: reduce) {
|
||||
#nd-sidebar a:hover .ktx-stars-star { transform: none; }
|
||||
.ktx-stars-skeleton-bar { animation: none; }
|
||||
}
|
||||
|
||||
/* Dot grid */
|
||||
.dot-grid {
|
||||
background-image: radial-gradient(
|
||||
|
|
|
|||
|
|
@ -1,22 +1,22 @@
|
|||
import type { BaseLayoutProps } from "fumadocs-ui/layouts/shared";
|
||||
import { GitHubIcon } from "@/components/github-icon";
|
||||
import { Logo } from "@/components/logo";
|
||||
import { SlackIcon } from "@/components/slack-icon";
|
||||
import { GitHubStars, GITHUB_REPO_URL } from "@/components/github-stars";
|
||||
import { ThemeToggle } from "@/components/theme-toggle";
|
||||
|
||||
export const baseOptions: BaseLayoutProps = {
|
||||
nav: {
|
||||
title: <Logo />,
|
||||
title: Logo,
|
||||
transparentMode: "top",
|
||||
},
|
||||
// Custom two-icon switcher (light / dark) where each icon selects its own
|
||||
// theme. The default "light-dark" switcher is a single blind toggle — both
|
||||
// icons just flip the theme, so clicking the sun while already in light mode
|
||||
// jumps to dark, which reads as broken.
|
||||
slots: {
|
||||
themeSwitch: ThemeToggle,
|
||||
},
|
||||
links: [
|
||||
{
|
||||
type: "icon",
|
||||
label: "GitHub",
|
||||
icon: <GitHubIcon />,
|
||||
text: "GitHub",
|
||||
url: "https://github.com/kaelio/ktx",
|
||||
external: true,
|
||||
},
|
||||
{
|
||||
type: "icon",
|
||||
label: "Join the ktx Slack community",
|
||||
|
|
@ -25,5 +25,13 @@ export const baseOptions: BaseLayoutProps = {
|
|||
url: "https://join.slack.com/t/ktxcommunity/shared_invite/zt-3y9b44m1x-LVyNNJD5nwaZHq4XS29LMQ",
|
||||
external: true,
|
||||
},
|
||||
{
|
||||
type: "icon",
|
||||
label: "Star ktx on GitHub",
|
||||
icon: <GitHubStars />,
|
||||
text: "GitHub",
|
||||
url: GITHUB_REPO_URL,
|
||||
external: true,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
|
|
|||
|
|
@ -305,8 +305,8 @@ export const runtimeEdges: Edge[] = [
|
|||
sourceHandle: "to-context",
|
||||
target: "context",
|
||||
targetHandle: "in",
|
||||
type: "default",
|
||||
label: "search",
|
||||
type: "smoothstep",
|
||||
label: "search + read",
|
||||
...labelBg,
|
||||
style: edgeStyle,
|
||||
markerStart: marker,
|
||||
|
|
@ -318,7 +318,7 @@ export const runtimeEdges: Edge[] = [
|
|||
sourceHandle: "to-warehouse",
|
||||
target: "warehouse",
|
||||
targetHandle: "in",
|
||||
type: "default",
|
||||
type: "smoothstep",
|
||||
label: "read-only",
|
||||
...labelBg,
|
||||
style: edgeStyle,
|
||||
|
|
|
|||
82
docs-site/components/github-stars.tsx
Normal file
82
docs-site/components/github-stars.tsx
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
import { Suspense } from "react";
|
||||
import { GitHubIcon } from "@/components/github-icon";
|
||||
|
||||
const REPO = "kaelio/ktx";
|
||||
export const GITHUB_REPO_URL = `https://github.com/${REPO}`;
|
||||
const API_URL = `https://api.github.com/repos/${REPO}`;
|
||||
|
||||
async function fetchStarCount(): Promise<number | null> {
|
||||
try {
|
||||
const res = await fetch(API_URL, {
|
||||
headers: { Accept: "application/vnd.github+json" },
|
||||
// Revalidate hourly. GitHub's unauthenticated REST limit is 60 req/h per
|
||||
// IP, so a single cached server-side fetch keeps the count fresh while
|
||||
// never exposing visitors to rate limits or layout shift.
|
||||
next: { revalidate: 3600 },
|
||||
});
|
||||
if (!res.ok) return null;
|
||||
const data = (await res.json()) as { stargazers_count?: unknown };
|
||||
return typeof data.stargazers_count === "number"
|
||||
? data.stargazers_count
|
||||
: null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Compact, GitHub-style count: 847 → "847", 1234 → "1.2k", 12345 → "12.3k". */
|
||||
function formatStars(count: number): string {
|
||||
if (count < 1000) return count.toLocaleString("en-US");
|
||||
const thousands = count / 1000;
|
||||
const rounded =
|
||||
thousands >= 100 ? Math.round(thousands) : Math.round(thousands * 10) / 10;
|
||||
return `${rounded}k`;
|
||||
}
|
||||
|
||||
function StarGlyph() {
|
||||
return (
|
||||
<svg className="ktx-stars-star" viewBox="0 0 24 24" aria-hidden="true">
|
||||
<path d="M12 2.6l2.9 5.88 6.49.95-4.7 4.57 1.11 6.46L12 17.4l-5.8 3.06 1.11-6.46-4.7-4.57 6.49-.95z" />
|
||||
</svg>
|
||||
);
|
||||
}
|
||||
|
||||
async function StarsInner() {
|
||||
const count = await fetchStarCount();
|
||||
return (
|
||||
<span className="ktx-stars">
|
||||
<GitHubIcon className="ktx-stars-gh" />
|
||||
{count !== null ? (
|
||||
<span className="ktx-stars-count-wrap">
|
||||
<StarGlyph />
|
||||
<span className="ktx-stars-count">{formatStars(count)}</span>
|
||||
</span>
|
||||
) : (
|
||||
<span className="ktx-stars-count">Star</span>
|
||||
)}
|
||||
</span>
|
||||
);
|
||||
}
|
||||
|
||||
function StarsSkeleton() {
|
||||
return (
|
||||
<span className="ktx-stars" aria-hidden="true">
|
||||
<GitHubIcon className="ktx-stars-gh" />
|
||||
<span className="ktx-stars-skeleton-bar" />
|
||||
</span>
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Footer star widget — GitHub mark + live count. Rendered as the `icon` of a
|
||||
* fumadocs `type: "icon"` link, so it lands in the sidebar footer pill beside
|
||||
* the Slack icon and the theme toggle. fumadocs supplies the surrounding <a>
|
||||
* (href + aria-label), so this renders inner content only — no anchor.
|
||||
*/
|
||||
export function GitHubStars() {
|
||||
return (
|
||||
<Suspense fallback={<StarsSkeleton />}>
|
||||
<StarsInner />
|
||||
</Suspense>
|
||||
);
|
||||
}
|
||||
|
|
@ -1,40 +1,56 @@
|
|||
export function Logo() {
|
||||
"use client";
|
||||
|
||||
import Link from "next/link";
|
||||
|
||||
const brandFont = {
|
||||
fontFamily: "var(--font-display), var(--font-sans), sans-serif",
|
||||
} as const;
|
||||
|
||||
export function Logo({ href = "/", className }: { href?: string; className?: string }) {
|
||||
return (
|
||||
<div className="flex items-center gap-3.5 group">
|
||||
<div className="relative flex items-center justify-center transition-transform duration-300 ease-out group-hover:rotate-[-4deg]">
|
||||
<img
|
||||
src="/ktx/brand/ktx-mascot.svg"
|
||||
alt=""
|
||||
aria-hidden="true"
|
||||
className="h-20 w-20 object-contain block dark:hidden"
|
||||
/>
|
||||
<img
|
||||
src="/ktx/brand/ktx-mascot-dark.svg"
|
||||
alt=""
|
||||
aria-hidden="true"
|
||||
className="h-20 w-20 object-contain hidden dark:block"
|
||||
/>
|
||||
</div>
|
||||
<div className="flex flex-col items-start leading-none">
|
||||
<div className={className}>
|
||||
<div className="flex items-center gap-3.5 group">
|
||||
<Link href={href} aria-label="ktx documentation home" className="flex items-center no-underline">
|
||||
<span className="relative flex items-center justify-center transition-transform duration-300 ease-out group-hover:rotate-[-4deg]">
|
||||
<img
|
||||
src="/ktx/brand/ktx-mascot.svg"
|
||||
alt=""
|
||||
aria-hidden="true"
|
||||
className="h-20 w-20 object-contain block dark:hidden"
|
||||
/>
|
||||
<img
|
||||
src="/ktx/brand/ktx-mascot-dark.svg"
|
||||
alt=""
|
||||
aria-hidden="true"
|
||||
className="h-20 w-20 object-contain hidden dark:block"
|
||||
/>
|
||||
</span>
|
||||
</Link>
|
||||
<div className="flex flex-col items-start leading-none">
|
||||
<Link
|
||||
href={href}
|
||||
className="text-[42px] font-semibold text-fd-foreground tracking-tight no-underline"
|
||||
style={brandFont}
|
||||
>
|
||||
ktx
|
||||
</Link>
|
||||
<a
|
||||
href="https://www.kaelio.com"
|
||||
target="_blank"
|
||||
rel="noreferrer"
|
||||
className="mt-1 whitespace-nowrap text-[13px] font-medium text-fd-muted-foreground/80 tracking-tight no-underline transition-colors hover:text-fd-foreground"
|
||||
style={brandFont}
|
||||
>
|
||||
by Kaelio
|
||||
</a>
|
||||
</div>
|
||||
<span
|
||||
className="text-[42px] font-semibold text-fd-foreground tracking-tight"
|
||||
style={{ fontFamily: "var(--font-display), var(--font-sans), sans-serif" }}
|
||||
className="text-[19px] font-medium text-fd-muted-foreground/80 tracking-tight border-l border-fd-border pl-3 ml-1"
|
||||
style={brandFont}
|
||||
>
|
||||
ktx
|
||||
</span>
|
||||
<span
|
||||
className="mt-1 whitespace-nowrap text-[13px] font-medium text-fd-muted-foreground/80 tracking-tight"
|
||||
style={{ fontFamily: "var(--font-display), var(--font-sans), sans-serif" }}
|
||||
>
|
||||
by Kaelio
|
||||
Docs
|
||||
</span>
|
||||
</div>
|
||||
<span
|
||||
className="text-[19px] font-medium text-fd-muted-foreground/80 tracking-tight border-l border-fd-border pl-3 ml-1"
|
||||
style={{ fontFamily: "var(--font-display), var(--font-sans), sans-serif" }}
|
||||
>
|
||||
Docs
|
||||
</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
|
|||
576
docs-site/components/product-runtime.tsx
Normal file
576
docs-site/components/product-runtime.tsx
Normal file
|
|
@ -0,0 +1,576 @@
|
|||
"use client";
|
||||
|
||||
import {
|
||||
type Edge,
|
||||
type EdgeProps,
|
||||
getSmoothStepPath,
|
||||
Handle,
|
||||
MarkerType,
|
||||
type Node,
|
||||
type NodeProps,
|
||||
Position,
|
||||
} from "@xyflow/react";
|
||||
|
||||
import { FlowCanvas } from "./flow-canvas";
|
||||
|
||||
type AgentNodeData = {
|
||||
title: string;
|
||||
items: string[];
|
||||
};
|
||||
|
||||
type HubNodeData = {
|
||||
title: string;
|
||||
badge: string;
|
||||
rows: string[];
|
||||
};
|
||||
|
||||
type TargetNodeData = {
|
||||
accent: string;
|
||||
title: string;
|
||||
body: string;
|
||||
rows: { text: string; color?: string; mono?: boolean }[];
|
||||
badge?: string;
|
||||
};
|
||||
|
||||
type AgentNode = Node<AgentNodeData, "agent">;
|
||||
type HubNode = Node<HubNodeData, "hub">;
|
||||
type TargetNode = Node<TargetNodeData, "target">;
|
||||
type FlowNode = AgentNode | HubNode | TargetNode;
|
||||
|
||||
const AGENT_W = 252;
|
||||
const AGENT_H = 96;
|
||||
const HUB_W = 306;
|
||||
const HUB_H = 190;
|
||||
const TARGET_W = 268;
|
||||
const TARGET_H = 148;
|
||||
|
||||
const CENTER_X = 470;
|
||||
const ROW_AGENT_Y = 0;
|
||||
const ROW_HUB_Y = 196;
|
||||
const ROW_TARGET_Y = 488;
|
||||
|
||||
const AGENT_X = CENTER_X - AGENT_W / 2;
|
||||
const HUB_X = CENTER_X - HUB_W / 2;
|
||||
|
||||
const TARGET_GAP_X = 38;
|
||||
const TARGETS_TOTAL = TARGET_W * 2 + TARGET_GAP_X;
|
||||
const TARGETS_START_X = CENTER_X - TARGETS_TOTAL / 2;
|
||||
const CONTEXT_X = TARGETS_START_X;
|
||||
const WAREHOUSE_X = TARGETS_START_X + TARGET_W + TARGET_GAP_X;
|
||||
|
||||
const EDGE_STROKE = "#94a3b8";
|
||||
const CYCLE_STROKE = "#0e7490";
|
||||
const EMERALD = "#059669";
|
||||
const TEAL = "#0e7490";
|
||||
|
||||
const nodes: FlowNode[] = [
|
||||
{
|
||||
id: "agent",
|
||||
type: "agent",
|
||||
position: { x: AGENT_X, y: ROW_AGENT_Y },
|
||||
data: {
|
||||
title: "Your agent",
|
||||
items: ["Claude Code", "Cursor", "Codex"],
|
||||
},
|
||||
draggable: false,
|
||||
selectable: false,
|
||||
},
|
||||
{
|
||||
id: "hub",
|
||||
type: "hub",
|
||||
position: { x: HUB_X, y: ROW_HUB_Y },
|
||||
data: {
|
||||
title: "ktx",
|
||||
badge: "MCP + CLI",
|
||||
rows: [
|
||||
"Search wiki + semantic layer",
|
||||
"Return approved metrics",
|
||||
"Compile metrics → SQL",
|
||||
],
|
||||
},
|
||||
draggable: false,
|
||||
selectable: false,
|
||||
},
|
||||
{
|
||||
id: "context",
|
||||
type: "target",
|
||||
position: { x: CONTEXT_X, y: ROW_TARGET_Y },
|
||||
data: {
|
||||
accent: TEAL,
|
||||
title: "Context layer",
|
||||
body: "Approved definitions agents search before they answer.",
|
||||
rows: [
|
||||
{ text: "wiki/*.md", color: EMERALD, mono: true },
|
||||
{ text: "semantic-layer/*.yaml", color: TEAL, mono: true },
|
||||
],
|
||||
},
|
||||
draggable: false,
|
||||
selectable: false,
|
||||
},
|
||||
{
|
||||
id: "warehouse",
|
||||
type: "target",
|
||||
position: { x: WAREHOUSE_X, y: ROW_TARGET_Y },
|
||||
data: {
|
||||
accent: "#334155",
|
||||
title: "Database",
|
||||
badge: "read-only",
|
||||
body: "Runs the compiled SQL. ktx never writes to it.",
|
||||
rows: [],
|
||||
},
|
||||
draggable: false,
|
||||
selectable: false,
|
||||
},
|
||||
];
|
||||
|
||||
const labelBg = {
|
||||
labelBgPadding: [6, 3] as [number, number],
|
||||
labelBgBorderRadius: 4,
|
||||
labelStyle: {
|
||||
fontSize: 13,
|
||||
fontWeight: 600,
|
||||
fill: "var(--color-fd-muted-foreground)",
|
||||
},
|
||||
labelBgStyle: {
|
||||
fill: "var(--color-fd-background)",
|
||||
stroke: "var(--color-fd-border)",
|
||||
strokeWidth: 1,
|
||||
},
|
||||
};
|
||||
|
||||
const requestMarker = {
|
||||
type: MarkerType.ArrowClosed,
|
||||
color: EDGE_STROKE,
|
||||
width: 16,
|
||||
height: 16,
|
||||
};
|
||||
|
||||
const flowEdges: Edge[] = [
|
||||
{
|
||||
id: "e-ask",
|
||||
source: "agent",
|
||||
sourceHandle: "ask",
|
||||
target: "hub",
|
||||
targetHandle: "ask",
|
||||
type: "straight",
|
||||
label: "ask",
|
||||
...labelBg,
|
||||
style: { stroke: EDGE_STROKE, strokeWidth: 1.5 },
|
||||
markerEnd: requestMarker,
|
||||
},
|
||||
{
|
||||
id: "e-answer",
|
||||
source: "hub",
|
||||
sourceHandle: "answer",
|
||||
target: "agent",
|
||||
targetHandle: "answer",
|
||||
type: "straight",
|
||||
label: "answer",
|
||||
...labelBg,
|
||||
style: { stroke: EDGE_STROKE, strokeWidth: 1.5 },
|
||||
markerEnd: requestMarker,
|
||||
},
|
||||
{
|
||||
id: "e-search",
|
||||
source: "hub",
|
||||
sourceHandle: "to-context",
|
||||
target: "context",
|
||||
targetHandle: "in",
|
||||
type: "smoothstep",
|
||||
label: "search + read",
|
||||
...labelBg,
|
||||
style: { stroke: CYCLE_STROKE, strokeWidth: 1.5 },
|
||||
markerStart: { type: MarkerType.ArrowClosed, color: CYCLE_STROKE, width: 14, height: 14 },
|
||||
markerEnd: { type: MarkerType.ArrowClosed, color: CYCLE_STROKE, width: 14, height: 14 },
|
||||
},
|
||||
{
|
||||
id: "e-readonly",
|
||||
source: "hub",
|
||||
sourceHandle: "to-warehouse",
|
||||
target: "warehouse",
|
||||
targetHandle: "in",
|
||||
type: "smoothstep",
|
||||
label: "read-only",
|
||||
...labelBg,
|
||||
style: { stroke: CYCLE_STROKE, strokeWidth: 1.5 },
|
||||
markerStart: { type: MarkerType.ArrowClosed, color: CYCLE_STROKE, width: 14, height: 14 },
|
||||
markerEnd: { type: MarkerType.ArrowClosed, color: CYCLE_STROKE, width: 14, height: 14 },
|
||||
},
|
||||
];
|
||||
|
||||
function AgentNodeView({ data }: NodeProps<AgentNode>) {
|
||||
return (
|
||||
<div
|
||||
style={{ width: AGENT_W, height: AGENT_H }}
|
||||
className="flex flex-col justify-center rounded-md border border-fd-border bg-fd-card px-3.5 py-2.5 shadow-sm"
|
||||
>
|
||||
<Handle
|
||||
id="ask"
|
||||
type="source"
|
||||
position={Position.Bottom}
|
||||
className="!opacity-0"
|
||||
style={{ left: "35%" }}
|
||||
/>
|
||||
<Handle
|
||||
id="answer"
|
||||
type="target"
|
||||
position={Position.Bottom}
|
||||
className="!opacity-0"
|
||||
style={{ left: "65%" }}
|
||||
/>
|
||||
<div className="flex items-center gap-2.5">
|
||||
<span className="flex h-8 w-8 flex-none items-center justify-center rounded-full bg-fd-primary/15 text-fd-primary">
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
width="18"
|
||||
height="18"
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
strokeWidth="1.75"
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
aria-hidden="true"
|
||||
>
|
||||
<rect x="3" y="6" width="18" height="12" rx="3" />
|
||||
<circle cx="9" cy="12" r="1.25" fill="currentColor" stroke="none" />
|
||||
<circle cx="15" cy="12" r="1.25" fill="currentColor" stroke="none" />
|
||||
<path d="M12 3v3" />
|
||||
</svg>
|
||||
</span>
|
||||
<p className="text-[17px] font-semibold leading-6 text-fd-foreground">
|
||||
{data.title}
|
||||
</p>
|
||||
</div>
|
||||
<div className="mt-2 flex flex-wrap gap-1.5">
|
||||
{data.items.map((item) => (
|
||||
<span
|
||||
key={item}
|
||||
className="rounded border border-fd-border bg-fd-background px-1.5 py-0.5 text-[12px] leading-5 text-fd-muted-foreground"
|
||||
>
|
||||
{item}
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function HubNodeView({ data }: NodeProps<HubNode>) {
|
||||
return (
|
||||
<div
|
||||
style={{ width: HUB_W, height: HUB_H }}
|
||||
className="relative flex flex-col rounded-md border border-cyan-200/20 bg-[#0f1f23] px-4 py-3.5 text-white shadow-sm dark:bg-[#0b181b]"
|
||||
>
|
||||
<Handle
|
||||
id="ask"
|
||||
type="target"
|
||||
position={Position.Top}
|
||||
className="!opacity-0"
|
||||
style={{ left: "37.5%" }}
|
||||
/>
|
||||
<Handle
|
||||
id="answer"
|
||||
type="source"
|
||||
position={Position.Top}
|
||||
className="!opacity-0"
|
||||
style={{ left: "62.5%" }}
|
||||
/>
|
||||
<Handle
|
||||
id="to-context"
|
||||
type="source"
|
||||
position={Position.Bottom}
|
||||
className="!opacity-0"
|
||||
style={{ left: "44%" }}
|
||||
/>
|
||||
<Handle
|
||||
id="to-warehouse"
|
||||
type="source"
|
||||
position={Position.Bottom}
|
||||
className="!opacity-0"
|
||||
style={{ left: "56%" }}
|
||||
/>
|
||||
<div className="flex items-center gap-2.5">
|
||||
<span className="flex h-7 w-7 flex-none items-center justify-center rounded-md bg-cyan-300/95 font-mono text-sm font-bold text-[#0b1c20]">
|
||||
k
|
||||
</span>
|
||||
<span className="text-[19px] font-bold leading-6 text-white">
|
||||
{data.title}
|
||||
</span>
|
||||
<span className="ml-1 rounded border border-cyan-200/30 bg-white/5 px-1.5 py-0.5 font-mono text-[11px] leading-5 text-cyan-100/85">
|
||||
{data.badge}
|
||||
</span>
|
||||
</div>
|
||||
<div className="mt-3 flex flex-1 flex-col justify-center gap-2">
|
||||
{data.rows.map((row) => (
|
||||
<div key={row} className="flex items-center gap-2.5">
|
||||
<span className="h-1.5 w-1.5 flex-none rounded-full bg-cyan-300/95" />
|
||||
<span className="text-[14px] font-medium leading-5 text-cyan-50/90">
|
||||
{row}
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function TargetNodeView({ data }: NodeProps<TargetNode>) {
|
||||
return (
|
||||
<div
|
||||
style={{
|
||||
width: TARGET_W,
|
||||
height: TARGET_H,
|
||||
borderTop: `3px solid ${data.accent}`,
|
||||
}}
|
||||
className="overflow-hidden rounded-md border border-fd-border bg-fd-card px-3.5 py-3 shadow-sm"
|
||||
>
|
||||
<Handle id="in" type="target" position={Position.Top} className="!opacity-0" />
|
||||
<div className="flex items-center gap-2">
|
||||
<p className="text-[17px] font-semibold leading-6 text-fd-foreground">
|
||||
{data.title}
|
||||
</p>
|
||||
{data.badge ? (
|
||||
<span
|
||||
className="rounded-full px-1.5 py-0.5 text-[11px] font-semibold leading-5"
|
||||
style={{
|
||||
color: data.accent,
|
||||
background: "color-mix(in oklch, var(--color-fd-card) 86%, #64748b)",
|
||||
}}
|
||||
>
|
||||
{data.badge}
|
||||
</span>
|
||||
) : null}
|
||||
</div>
|
||||
{data.rows.length > 0 ? (
|
||||
<div className="mt-1 flex flex-col gap-0.5">
|
||||
{data.rows.map((row) => (
|
||||
<span
|
||||
key={row.text}
|
||||
className={
|
||||
row.mono
|
||||
? "font-mono text-[13px] font-semibold tracking-tight"
|
||||
: "text-[12px] leading-4 text-fd-muted-foreground"
|
||||
}
|
||||
style={row.color ? { color: row.color } : undefined}
|
||||
>
|
||||
{row.text}
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
) : null}
|
||||
<p className="mt-1.5 line-clamp-2 text-[13px] leading-[18px] text-fd-muted-foreground">
|
||||
{data.body}
|
||||
</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
/* ------------------------------- Particles ------------------------------- */
|
||||
|
||||
const PARTICLE_SPEED_PX_PER_SEC = 150;
|
||||
const PARTICLE_MIN_DURATION_SEC = 5;
|
||||
|
||||
type Leg = {
|
||||
sx: number;
|
||||
sy: number;
|
||||
sPos: Position;
|
||||
tx: number;
|
||||
ty: number;
|
||||
tPos: Position;
|
||||
};
|
||||
|
||||
const AGENT_ASK_X = AGENT_X + AGENT_W * 0.35;
|
||||
const AGENT_ANSWER_X = AGENT_X + AGENT_W * 0.65;
|
||||
const AGENT_BOTTOM_Y = ROW_AGENT_Y + AGENT_H;
|
||||
const HUB_ASK_X = HUB_X + HUB_W * 0.375;
|
||||
const HUB_ANSWER_X = HUB_X + HUB_W * 0.625;
|
||||
const HUB_TO_CONTEXT_X = HUB_X + HUB_W * 0.44;
|
||||
const HUB_TO_WAREHOUSE_X = HUB_X + HUB_W * 0.56;
|
||||
const HUB_BOTTOM_Y = ROW_HUB_Y + HUB_H;
|
||||
const CONTEXT_TOP_X = CONTEXT_X + TARGET_W / 2;
|
||||
const WAREHOUSE_TOP_X = WAREHOUSE_X + TARGET_W / 2;
|
||||
|
||||
function buildCyclePath(spokeX: number, targetX: number): {
|
||||
d: string;
|
||||
length: number;
|
||||
} {
|
||||
const legs: Leg[] = [
|
||||
// agent → hub (ask, down)
|
||||
{ sx: AGENT_ASK_X, sy: AGENT_BOTTOM_Y, sPos: Position.Bottom, tx: HUB_ASK_X, ty: ROW_HUB_Y, tPos: Position.Top },
|
||||
// through the hub to its spoke handle (down, drawn behind the hub)
|
||||
{ sx: HUB_ASK_X, sy: ROW_HUB_Y, sPos: Position.Bottom, tx: spokeX, ty: HUB_BOTTOM_Y, tPos: Position.Top },
|
||||
// hub → target (down)
|
||||
{ sx: spokeX, sy: HUB_BOTTOM_Y, sPos: Position.Bottom, tx: targetX, ty: ROW_TARGET_Y, tPos: Position.Top },
|
||||
// target → hub (up)
|
||||
{ sx: targetX, sy: ROW_TARGET_Y, sPos: Position.Top, tx: spokeX, ty: HUB_BOTTOM_Y, tPos: Position.Bottom },
|
||||
// through the hub to its answer handle (up, drawn behind the hub)
|
||||
{ sx: spokeX, sy: HUB_BOTTOM_Y, sPos: Position.Top, tx: HUB_ANSWER_X, ty: ROW_HUB_Y, tPos: Position.Bottom },
|
||||
// hub → agent (answer, up)
|
||||
{ sx: HUB_ANSWER_X, sy: ROW_HUB_Y, sPos: Position.Top, tx: AGENT_ANSWER_X, ty: AGENT_BOTTOM_Y, tPos: Position.Bottom },
|
||||
];
|
||||
|
||||
const segments = legs.map((leg) => {
|
||||
const [segment] = getSmoothStepPath({
|
||||
sourceX: leg.sx,
|
||||
sourceY: leg.sy,
|
||||
sourcePosition: leg.sPos,
|
||||
targetX: leg.tx,
|
||||
targetY: leg.ty,
|
||||
targetPosition: leg.tPos,
|
||||
});
|
||||
return segment;
|
||||
});
|
||||
|
||||
let d = segments[0];
|
||||
for (let i = 1; i < segments.length; i += 1) {
|
||||
d += ` ${segments[i].replace(/^M/, "L")}`;
|
||||
}
|
||||
|
||||
const length = legs.reduce(
|
||||
(sum, leg) => sum + Math.abs(leg.tx - leg.sx) + Math.abs(leg.ty - leg.sy),
|
||||
0,
|
||||
);
|
||||
|
||||
return { d, length };
|
||||
}
|
||||
|
||||
type ParticleEdgeData = {
|
||||
d: string;
|
||||
duration: number;
|
||||
beginOffset: number;
|
||||
color: string;
|
||||
};
|
||||
|
||||
type ParticleEdge = Edge<ParticleEdgeData, "particle">;
|
||||
|
||||
function ParticleEdgeView({ id, data }: EdgeProps<ParticleEdge>) {
|
||||
if (!data) return null;
|
||||
const pathId = `runtime-particle-path-${id}`;
|
||||
return (
|
||||
<>
|
||||
<path id={pathId} d={data.d} fill="none" stroke="none" pointerEvents="none" />
|
||||
<g className="runtime-particle" style={{ color: data.color }}>
|
||||
<circle r={7.5} fill="currentColor" opacity={0.16} />
|
||||
<circle r={3.75} fill="currentColor" opacity={0.32} />
|
||||
<circle r={2.1} fill="currentColor" />
|
||||
<animateMotion
|
||||
dur={`${data.duration.toFixed(2)}s`}
|
||||
begin={`-${data.beginOffset.toFixed(2)}s`}
|
||||
repeatCount="indefinite"
|
||||
>
|
||||
<mpath href={`#${pathId}`} />
|
||||
</animateMotion>
|
||||
</g>
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
function makeCycleEdge(
|
||||
id: string,
|
||||
source: string,
|
||||
spokeX: number,
|
||||
targetX: number,
|
||||
beginFraction: number,
|
||||
): ParticleEdge {
|
||||
const { d, length } = buildCyclePath(spokeX, targetX);
|
||||
const duration = Math.max(
|
||||
PARTICLE_MIN_DURATION_SEC,
|
||||
length / PARTICLE_SPEED_PX_PER_SEC,
|
||||
);
|
||||
return {
|
||||
id,
|
||||
source,
|
||||
target: source,
|
||||
type: "particle",
|
||||
data: { d, duration, beginOffset: duration * beginFraction, color: CYCLE_STROKE },
|
||||
};
|
||||
}
|
||||
|
||||
const particleEdges: ParticleEdge[] = [
|
||||
makeCycleEdge("p-context", "context", HUB_TO_CONTEXT_X, CONTEXT_TOP_X, 0),
|
||||
makeCycleEdge("p-warehouse", "warehouse", HUB_TO_WAREHOUSE_X, WAREHOUSE_TOP_X, 0.5),
|
||||
];
|
||||
|
||||
const nodeTypes = {
|
||||
agent: AgentNodeView,
|
||||
hub: HubNodeView,
|
||||
target: TargetNodeView,
|
||||
};
|
||||
|
||||
const edgeTypes = {
|
||||
particle: ParticleEdgeView,
|
||||
};
|
||||
|
||||
const edges = [...flowEdges, ...particleEdges];
|
||||
|
||||
export function ProductRuntime() {
|
||||
return (
|
||||
<section
|
||||
className="not-prose my-12 w-full max-w-full min-w-0 space-y-5"
|
||||
aria-labelledby="runtime-title"
|
||||
>
|
||||
<div className="max-w-3xl">
|
||||
<h2
|
||||
id="runtime-title"
|
||||
className="text-xl font-semibold tracking-normal text-fd-foreground sm:text-2xl"
|
||||
style={{ fontFamily: "var(--font-display)" }}
|
||||
>
|
||||
How serving works
|
||||
</h2>
|
||||
<p className="mt-3 text-sm leading-6 text-fd-muted-foreground">
|
||||
At runtime, agents reach ktx through MCP. ktx searches the context
|
||||
layer, returns approved metrics, and compiles them into read-only SQL
|
||||
the warehouse runs.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<article
|
||||
className="max-w-full min-w-0 overflow-hidden rounded-lg border border-fd-border bg-fd-card shadow-sm"
|
||||
aria-label="ktx serving flow from an agent request to a governed answer"
|
||||
>
|
||||
<div className="border-b border-fd-border bg-fd-muted/35 px-5 py-4">
|
||||
<p className="text-xs font-semibold uppercase tracking-wide text-fd-primary">
|
||||
Serving flow
|
||||
</p>
|
||||
<h3
|
||||
className="mt-1 text-base font-semibold tracking-normal text-fd-foreground sm:text-lg"
|
||||
style={{ fontFamily: "var(--font-display)" }}
|
||||
>
|
||||
From an agent request to a governed answer
|
||||
</h3>
|
||||
<p className="mt-2 max-w-3xl text-xs leading-5 text-fd-muted-foreground">
|
||||
The agent asks in plain language. ktx is the only thing that touches
|
||||
the context layer and the warehouse, and every database connection
|
||||
is read-only.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<FlowCanvas
|
||||
nodes={nodes}
|
||||
edges={edges}
|
||||
nodeTypes={nodeTypes}
|
||||
edgeTypes={edgeTypes}
|
||||
canvasStyle={{
|
||||
height: "min(620px, 98vw)",
|
||||
minHeight: 430,
|
||||
}}
|
||||
className="runtime-canvas"
|
||||
fitViewOptions={{ padding: 0.06 }}
|
||||
ariaLabel="ktx serving flow diagram"
|
||||
/>
|
||||
</article>
|
||||
<style>{`
|
||||
.runtime-canvas .runtime-particle {
|
||||
pointer-events: none;
|
||||
filter: drop-shadow(0 0 6px currentColor);
|
||||
}
|
||||
@media (prefers-reduced-motion: reduce) {
|
||||
.runtime-canvas .runtime-particle {
|
||||
display: none;
|
||||
}
|
||||
}
|
||||
`}</style>
|
||||
</section>
|
||||
);
|
||||
}
|
||||
97
docs-site/components/theme-toggle.tsx
Normal file
97
docs-site/components/theme-toggle.tsx
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
"use client";
|
||||
|
||||
import { useEffect, useState, type ComponentProps, type SVGProps } from "react";
|
||||
import { useTheme } from "fumadocs-ui/provider/base";
|
||||
|
||||
/**
|
||||
* Two-icon theme switcher (light / dark), each icon selecting its own theme —
|
||||
* unlike fumadocs' default "light-dark" switcher, which is a single blind
|
||||
* toggle that flips on any click. Dropped into the sidebar footer pill via
|
||||
* `slots.themeSwitch`, so fumadocs passes the container className (left
|
||||
* divider, `ms-auto`, rounded inner buttons); we merge it onto our own base.
|
||||
*
|
||||
* Icons are inlined (the project doesn't depend on `lucide-react` directly);
|
||||
* `useTheme` is re-exported by fumadocs so we avoid a bare `next-themes` import.
|
||||
*/
|
||||
function SunIcon(props: SVGProps<SVGSVGElement>) {
|
||||
return (
|
||||
<svg
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
strokeWidth={2}
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
aria-hidden="true"
|
||||
{...props}
|
||||
>
|
||||
<circle cx="12" cy="12" r="4" />
|
||||
<path d="M12 2v2" />
|
||||
<path d="M12 20v2" />
|
||||
<path d="m4.93 4.93 1.41 1.41" />
|
||||
<path d="m17.66 17.66 1.41 1.41" />
|
||||
<path d="M2 12h2" />
|
||||
<path d="M20 12h2" />
|
||||
<path d="m6.34 17.66-1.41 1.41" />
|
||||
<path d="m19.07 4.93-1.41 1.41" />
|
||||
</svg>
|
||||
);
|
||||
}
|
||||
|
||||
function MoonIcon(props: SVGProps<SVGSVGElement>) {
|
||||
return (
|
||||
<svg
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
strokeWidth={2}
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
aria-hidden="true"
|
||||
{...props}
|
||||
>
|
||||
<path d="M12 3a6 6 0 0 0 9 9 9 9 0 1 1-9-9Z" />
|
||||
</svg>
|
||||
);
|
||||
}
|
||||
|
||||
const OPTIONS = [
|
||||
["light", SunIcon],
|
||||
["dark", MoonIcon],
|
||||
] as const;
|
||||
|
||||
function cx(...classes: (string | false | undefined)[]): string {
|
||||
return classes.filter(Boolean).join(" ");
|
||||
}
|
||||
|
||||
export function ThemeToggle({ className, ...props }: ComponentProps<"div">) {
|
||||
const { setTheme, resolvedTheme } = useTheme();
|
||||
const [mounted, setMounted] = useState(false);
|
||||
useEffect(() => setMounted(true), []);
|
||||
const active = mounted ? resolvedTheme : null;
|
||||
|
||||
return (
|
||||
<div
|
||||
className={cx("inline-flex items-center overflow-hidden border", className)}
|
||||
data-theme-toggle=""
|
||||
{...props}
|
||||
>
|
||||
{OPTIONS.map(([key, Icon]) => (
|
||||
<button
|
||||
key={key}
|
||||
type="button"
|
||||
aria-label={key}
|
||||
onClick={() => setTheme(key)}
|
||||
className={cx(
|
||||
"size-6.5 p-1.5 transition-colors",
|
||||
active === key
|
||||
? "bg-fd-accent text-fd-accent-foreground"
|
||||
: "text-fd-muted-foreground hover:text-fd-accent-foreground",
|
||||
)}
|
||||
>
|
||||
<Icon className="size-full" />
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
@ -1,40 +0,0 @@
|
|||
---
|
||||
title: Agent Instructions
|
||||
description: Suggested instructions for coding assistants that need to read and cite ktx docs.
|
||||
---
|
||||
|
||||
Use these instructions when a coding assistant needs to answer questions from the **ktx** documentation.
|
||||
|
||||
```text
|
||||
When answering ktx docs questions:
|
||||
|
||||
1. Start with https://docs.kaelio.com/ktx/llms.txt.
|
||||
2. Fetch the smallest relevant Markdown page from the index.
|
||||
3. Prefer /docs/<path>.md over rendered HTML.
|
||||
4. Use https://docs.kaelio.com/ktx/llms-full.txt only when the task needs broad docs context.
|
||||
5. Quote commands exactly from docs pages.
|
||||
6. If docs and local repository behavior disagree, say what differs and prefer local verified output for code changes.
|
||||
```
|
||||
|
||||
## What this is for
|
||||
|
||||
This page is for documentation consumption only:
|
||||
|
||||
- answering questions about **ktx**
|
||||
- finding the right docs page
|
||||
- citing setup or CLI guidance
|
||||
- helping an assistant avoid stale or invented commands
|
||||
|
||||
It does not describe local tool configuration.
|
||||
|
||||
## Minimal project prompt
|
||||
|
||||
```text
|
||||
You are helping with ktx. Read https://docs.kaelio.com/ktx/llms.txt first, then fetch only the Markdown pages needed for the task. Do not scrape the rendered docs site when a .md route exists.
|
||||
```
|
||||
|
||||
## Repository prompt
|
||||
|
||||
```text
|
||||
Before editing ktx docs, read /llms.txt and the affected .md docs pages. Keep AI Resources focused on docs consumption. After editing, verify /llms.txt, /llms-full.txt, and any changed .md routes.
|
||||
```
|
||||
|
|
@ -1,54 +0,0 @@
|
|||
---
|
||||
title: Agent Quickstart
|
||||
description: A task-first route for coding agents that need to understand ktx docs.
|
||||
---
|
||||
|
||||
This page is for coding assistants reading or citing the **ktx** docs. It is intentionally limited to documentation lookup, docs navigation, and safe command discovery.
|
||||
|
||||
For Markdown endpoints, use [Markdown Access](/docs/ai-resources/markdown-access).
|
||||
For reusable task prompts, use [Prompt Recipes](/docs/ai-resources/prompt-recipes).
|
||||
To install **ktx** into an agent client, use [Agent Clients](/docs/integrations/agent-clients).
|
||||
|
||||
## First read
|
||||
|
||||
Agents should start with the smallest source that answers the task:
|
||||
|
||||
1. [`/llms.txt`](/llms.txt) - discover the docs and preferred entry points.
|
||||
2. The relevant per-page Markdown URL, for example `/docs/getting-started/quickstart.md`.
|
||||
3. [`/llms-full.txt`](/llms-full.txt) - use only when the task needs broad context across many pages.
|
||||
|
||||
## Task router
|
||||
|
||||
| User asks the agent to explain... | Read first | Then read |
|
||||
|------------------------------------|------------|-----------|
|
||||
| What **ktx** does | [Introduction](/docs/getting-started/introduction) | [The Context Layer](/docs/concepts/the-context-layer) |
|
||||
| How to start from a checkout | [Quickstart](/docs/getting-started/quickstart) | [ktx setup](/docs/cli-reference/ktx-setup) |
|
||||
| How to check project readiness | [ktx status](/docs/cli-reference/ktx-status) | [Quickstart](/docs/getting-started/quickstart) |
|
||||
| How context gets built | [Building Context](/docs/guides/building-context) | [ktx ingest](/docs/cli-reference/ktx-ingest) |
|
||||
| How semantic YAML works | [Writing Context](/docs/guides/writing-context) | [ktx sl](/docs/cli-reference/ktx-sl) |
|
||||
| How machine-readable CLI output is shaped | [ktx sl](/docs/cli-reference/ktx-sl) | [ktx wiki](/docs/cli-reference/ktx-wiki) |
|
||||
|
||||
## Operating workflow
|
||||
|
||||
Use this workflow when the user asks an assistant to answer a **ktx** docs question:
|
||||
|
||||
1. Read [`/llms.txt`](/llms.txt).
|
||||
2. Pick the smallest relevant `.md` page.
|
||||
3. Use [`/llms-full.txt`](/llms-full.txt) only if the answer needs multiple sections of the docs.
|
||||
4. Quote commands exactly from the docs page.
|
||||
5. If a command affects a local project, ask the user before assuming credentials or live services are available.
|
||||
|
||||
## Docs lookup from a shell
|
||||
|
||||
```bash
|
||||
curl https://docs.kaelio.com/ktx/llms.txt
|
||||
curl https://docs.kaelio.com/ktx/docs/getting-started/quickstart.md
|
||||
```
|
||||
|
||||
## Guardrails
|
||||
|
||||
- Do not invent CLI flags. Fetch the relevant CLI reference page.
|
||||
- Do not scrape rendered HTML when a `.md` route exists.
|
||||
- Do not assume docs lookup requires agent-client configuration.
|
||||
- Do not include credentials or secrets in prompts, URLs, or copied docs snippets.
|
||||
- When docs and local CLI behavior disagree, prefer the local CLI output and mention the mismatch.
|
||||
|
|
@ -1,76 +0,0 @@
|
|||
---
|
||||
title: Markdown Access
|
||||
description: Fetch ktx docs as llms.txt, llms-full.txt, or per-page Markdown.
|
||||
---
|
||||
|
||||
**ktx** docs are available as plain Markdown so assistants do not need to parse the rendered HTML site.
|
||||
|
||||
## Index
|
||||
|
||||
Fetch the curated index:
|
||||
|
||||
```text
|
||||
https://docs.kaelio.com/ktx/llms.txt
|
||||
```
|
||||
|
||||
Use this file to discover high-value pages, task-specific entry points, and Markdown URLs.
|
||||
|
||||
## Full corpus
|
||||
|
||||
Fetch the complete docs corpus:
|
||||
|
||||
```text
|
||||
https://docs.kaelio.com/ktx/llms-full.txt
|
||||
```
|
||||
|
||||
Use this when an assistant needs broad context across setup, concepts, CLI reference, integrations, and troubleshooting. Prefer the smaller per-page Markdown route for narrow tasks.
|
||||
|
||||
## Per-page Markdown
|
||||
|
||||
Every docs page has a Markdown route:
|
||||
|
||||
```text
|
||||
https://docs.kaelio.com/ktx/docs/getting-started/quickstart.md
|
||||
https://docs.kaelio.com/ktx/docs/cli-reference/ktx-sl.md
|
||||
https://docs.kaelio.com/ktx/docs/cli-reference/ktx-wiki.md
|
||||
https://docs.kaelio.com/ktx/docs/guides/building-context.md
|
||||
```
|
||||
|
||||
Requests that ask for Markdown can also use the normal docs URL with `Accept: text/markdown`:
|
||||
|
||||
```bash
|
||||
curl -H "Accept: text/markdown" https://docs.kaelio.com/ktx/docs/getting-started/quickstart
|
||||
```
|
||||
|
||||
## Recommended retrieval order
|
||||
|
||||
1. Fetch `/llms.txt`.
|
||||
2. Select one or two relevant page Markdown URLs.
|
||||
3. Fetch `/llms-full.txt` only when page-level docs are not enough.
|
||||
|
||||
## Output contract
|
||||
|
||||
Markdown responses are designed for agent consumption:
|
||||
|
||||
- Frontmatter is removed.
|
||||
- Each page includes a title, description, canonical URL, and Markdown URL.
|
||||
- Code blocks stay as code blocks.
|
||||
- Tables stay as Markdown tables.
|
||||
- Missing docs pages return a plain-text `404` instead of silently falling back to HTML.
|
||||
|
||||
## Page actions
|
||||
|
||||
Rendered docs pages include page-level actions near the title:
|
||||
|
||||
- **Copy MD** copies the generated Markdown for the current page.
|
||||
- **View MD** opens the generated Markdown route.
|
||||
- **Copy MDX** copies the source MDX for the current page.
|
||||
|
||||
## Common mistakes
|
||||
|
||||
| Mistake | Better path |
|
||||
|---------|-------------|
|
||||
| Scraping the HTML page for a docs answer | Fetch the `.md` route instead |
|
||||
| Loading `/llms-full.txt` for a single CLI flag lookup | Fetch the relevant CLI reference page |
|
||||
| Treating `/llms.txt` as complete documentation | Use it as an index, then fetch linked pages |
|
||||
| Copying rendered text by hand | Use **Copy MD** or **Copy MDX** from the page actions |
|
||||
|
|
@ -1,10 +0,0 @@
|
|||
{
|
||||
"title": "AI Resources",
|
||||
"defaultOpen": true,
|
||||
"pages": [
|
||||
"agent-quickstart",
|
||||
"markdown-access",
|
||||
"agent-instructions",
|
||||
"prompt-recipes"
|
||||
]
|
||||
}
|
||||
|
|
@ -1,55 +0,0 @@
|
|||
---
|
||||
title: Prompt Recipes
|
||||
description: Copyable prompts for common ktx agent workflows.
|
||||
---
|
||||
|
||||
Use these prompts when asking a coding assistant to work with **ktx**. Replace project names, connection ids, and business terms with your own values.
|
||||
|
||||
## Learn the docs
|
||||
|
||||
```text
|
||||
Read https://docs.kaelio.com/ktx/llms.txt first. Then fetch only the ktx Markdown pages needed for this task. Do not scrape rendered HTML unless no Markdown route exists.
|
||||
```
|
||||
|
||||
## Set up a project
|
||||
|
||||
```text
|
||||
Run npx skills add Kaelio/ktx --skill ktx and use the ktx skill to install
|
||||
and configure ktx in this project.
|
||||
```
|
||||
|
||||
## Find a command
|
||||
|
||||
```text
|
||||
Find the correct ktx command for this task: <task>. Start with /llms.txt, then fetch the smallest relevant CLI reference .md page. Quote the exact command and flags from the docs.
|
||||
```
|
||||
|
||||
## Explain setup
|
||||
|
||||
```text
|
||||
Explain how to set up ktx for this repo. Read /docs/getting-started/quickstart.md and the relevant CLI reference pages. Summarize prerequisites, commands, generated files, and any credentials the user must provide manually.
|
||||
```
|
||||
|
||||
## Compare concepts
|
||||
|
||||
```text
|
||||
Explain the difference between these ktx concepts: <concepts>. Start from /llms.txt, fetch the relevant concept and guide pages as Markdown, and answer with links to the source pages.
|
||||
```
|
||||
|
||||
## Review semantic changes
|
||||
|
||||
```text
|
||||
Review the ktx semantic-layer and knowledge changes in this branch. Check that measures have clear definitions, joins use valid keys, hidden/internal columns are not exposed to agents, and validation passes. List concrete file and line issues first.
|
||||
```
|
||||
|
||||
## Copy exact docs source
|
||||
|
||||
```text
|
||||
Open the relevant ktx docs page and use the page action to copy the generated Markdown or source MDX. Preserve code fences and tables exactly.
|
||||
```
|
||||
|
||||
## Update docs
|
||||
|
||||
```text
|
||||
Update the ktx docs for agent readability. Keep AI Resources focused on docs consumption. After editing, verify /llms.txt, /llms-full.txt, and the affected .md routes.
|
||||
```
|
||||
|
|
@ -48,6 +48,11 @@ directory. Use it from any directory to generate editor or agent schema files.
|
|||
| `stop` | Stop the **ktx** daemon |
|
||||
| `status` | Show managed Python runtime status and readiness checks |
|
||||
|
||||
`install` is self-contained: **ktx** downloads its own pinned, checksum-verified
|
||||
`uv` build under the runtime root and uses it to provision Python and the
|
||||
runtime wheel. Nothing needs to be installed on `PATH` first; the host only
|
||||
needs network access to `github.com` during the first install.
|
||||
|
||||
## `admin runtime` Options
|
||||
|
||||
| Flag | Description | Default |
|
||||
|
|
|
|||
|
|
@ -177,7 +177,9 @@ Slowest phase: reconciliation (2m 05s, 48% of wall time). 2 work units (1 failed
|
|||
|
||||
Work units run serially by default (`ingest.workUnits.maxConcurrency` is `1`);
|
||||
raise it in `ktx.yaml` if the profile shows the run is bound by serialized
|
||||
work-unit agent loops.
|
||||
work-unit agent loops. If the provider reports an LLM rate limit, **ktx** shows
|
||||
a transient wait message and temporarily reduces effective work-unit concurrency
|
||||
according to `ingest.rateLimit`.
|
||||
|
||||
## Common errors
|
||||
|
||||
|
|
|
|||
|
|
@ -68,3 +68,4 @@ hosts and origins for browser clients.
|
|||
| No **ktx** project found | Current directory has no `ktx.yaml` and `KTX_PROJECT_DIR` is unset | Run from a **ktx** project or pass `--project-dir <path>` |
|
||||
| Non-loopback host rejected | The server needs token auth before binding beyond localhost | Pass `--token <token>` or set `KTX_MCP_TOKEN` |
|
||||
| Client cannot connect | Host, port, token, allowed host, or allowed origin does not match the client | Check `ktx mcp status`, then restart with explicit `--host`, `--port`, `--allowed-host`, and `--allowed-origin` values |
|
||||
| A Python-backed tool reports a runtime install failure | A tool that needs the managed Python runtime (metric compute, query-history SQL analysis) ran on a host that cannot reach `github.com` to download the pinned `uv` and Python | The server still starts and serves catalog and search tools. Restore network access and retry, or pre-build the runtime where network is available: `ktx admin runtime install --yes` |
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ below.
|
|||
| `--agents` | Install agent configuration and rules only | `false` |
|
||||
| `--target <target>` | Agent target: `claude-code`, `claude-desktop`, `codex`, `cursor`, `opencode`, or `universal` | - |
|
||||
| `--global` | Install agent integration into the global target scope for `claude-code` or `codex` | `false` |
|
||||
| `--install-dir <path>` | Directory to install project-scoped agent config into. Defaults to the ktx project directory; resolved against the current directory and created if missing. Use it to install `.claude/`, `.mcp.json`, and rules where you open your agent (e.g. `--install-dir .`). Mutually exclusive with `--global` and `--local` | ktx project dir |
|
||||
| `--yes` | Accept project creation and runtime install defaults where setup asks for confirmation | `false` |
|
||||
| `--no-input` | Disable interactive terminal input | - |
|
||||
|
||||
|
|
@ -54,7 +55,6 @@ prompts.
|
|||
| `--llm-backend <backend>` | LLM backend: `anthropic`, `vertex`, `claude-code`, or `codex` |
|
||||
| `--llm-backend claude-code` | Use the local Claude Code session for **ktx** LLM calls |
|
||||
| `--llm-backend codex` | Use local Codex authentication for **ktx** LLM calls |
|
||||
| `--llm-model <model>` | LLM model ID or backend model alias to validate and save |
|
||||
| `--anthropic-api-key-env <name>` | Environment variable containing the Anthropic API key |
|
||||
| `--anthropic-api-key-file <path>` | File containing the Anthropic API key |
|
||||
| `--vertex-project <project>` | Vertex AI project ID, `env:NAME`, or `file:/path` reference |
|
||||
|
|
@ -64,13 +64,16 @@ prompts.
|
|||
Choose only one Anthropic credential source. Anthropic credential flags are only
|
||||
valid with the Anthropic backend; Vertex flags are only valid with the Vertex
|
||||
backend. The `claude-code` and `codex` backends use local authentication instead
|
||||
of Anthropic API key or Vertex flags. For Claude Code, `--llm-model` accepts
|
||||
`sonnet`, `opus`, `haiku`, or a full Claude model ID. For Codex, `--llm-model`
|
||||
accepts `codex`, `default`, or a `gpt-*` / `codex-*` model ID such as
|
||||
`gpt-5.5`; any other value is rejected before the auth probe. Run `codex` to
|
||||
see the models available to your login, and pick a `gpt-*` / `codex-*` id from
|
||||
that list. Note that `*-codex` API-billing model IDs (for example
|
||||
`gpt-5.3-codex`) are not available to ChatGPT-subscription logins.
|
||||
of Anthropic API key or Vertex flags. After you choose a backend, `ktx setup`
|
||||
writes that backend's per-role model preset to `ktx.yaml`. To change a model,
|
||||
edit the matching `llm.models.<role>` value in `ktx.yaml`.
|
||||
|
||||
With `--no-input`, `ktx setup` does not assume a default LLM provider, because
|
||||
every backend needs credentials only you can supply. Pass `--llm-backend`
|
||||
explicitly. Note that `--target` selects the agent integration, not the LLM
|
||||
provider: `ktx setup --target claude-code --no-input` still needs
|
||||
`--llm-backend claude-code` to use your Claude subscription for **ktx** LLM
|
||||
calls.
|
||||
|
||||
### Embeddings
|
||||
|
||||
|
|
@ -123,6 +126,14 @@ incomplete.
|
|||
MySQL, and SQL Server; `schema_names` for Snowflake; `dataset_ids` for
|
||||
BigQuery; and `databases` for ClickHouse.
|
||||
|
||||
With `--no-input`, scope for a scope-bearing driver (PostgreSQL, MySQL,
|
||||
ClickHouse, SQL Server, BigQuery, Snowflake) must come from `--database-schema`
|
||||
or from existing connection config in `ktx.yaml` (for example
|
||||
`connections.<id>.dataset_ids`). When neither is set, the database step fails
|
||||
fast and prints the missing scope flag and config key — non-interactive setup
|
||||
never auto-discovers and scans every schema. SQLite has no scope and is
|
||||
unaffected.
|
||||
|
||||
### Query History
|
||||
|
||||
| Flag | Description |
|
||||
|
|
@ -198,14 +209,13 @@ ktx setup
|
|||
# Run setup for a specific project directory
|
||||
ktx setup --project-dir ./analytics
|
||||
|
||||
# Use Claude Code with Opus for ktx LLM calls
|
||||
# Use Claude Code for ktx LLM calls
|
||||
ktx setup \
|
||||
--project-dir ./analytics \
|
||||
--llm-backend claude-code \
|
||||
--llm-model opus
|
||||
--llm-backend claude-code
|
||||
|
||||
# Configure **ktx** to use local Codex authentication for LLM work
|
||||
ktx setup --llm-backend codex --llm-model gpt-5.5 --no-input
|
||||
ktx setup --llm-backend codex --no-input
|
||||
```
|
||||
|
||||
When you choose `--llm-backend codex`, setup prints a warning if the public
|
||||
|
|
@ -282,6 +292,7 @@ Use `ktx status` for repeatable readiness checks after setup exits.
|
|||
|-------|-------|----------|
|
||||
| Setup resumes an unexpected project | `KTX_PROJECT_DIR` or nearest `ktx.yaml` points to another directory | Pass `--project-dir <path>` explicitly |
|
||||
| Setup cannot run in CI | Required values are missing and `--no-input` disables prompts | Provide the relevant automation flags or create a fixture `ktx.yaml` |
|
||||
| `Missing LLM backend: pass --llm-backend …` | `--no-input` setup ran without an LLM backend; `--target` does not select one | Pass `--llm-backend claude-code`, `codex`, `anthropic`, or `vertex` (with that backend's credential flags) |
|
||||
| Provider health check fails | Provider key, model id, Vertex project, or Vertex location is invalid | Fix the `env:` or `file:` reference and rerun setup |
|
||||
| Python runtime is missing | The selected setup needs runtime-backed agent, query-history, Looker, or local embedding features | Accept the interactive prompt, rerun with `--yes`, or run the suggested `ktx admin runtime install` command |
|
||||
| `--enable-query-history` is rejected | The selected database driver does not support query history | Use Postgres, BigQuery, or Snowflake, or rerun without query-history flags |
|
||||
|
|
|
|||
|
|
@ -94,6 +94,6 @@ stats, and are always shown (they do not require external communication).
|
|||
|-------|-------|----------|
|
||||
| No **ktx** project found | Current directory has no `ktx.yaml` and `KTX_PROJECT_DIR` is unset | `ktx status` runs setup checks; run from a **ktx** project or set `KTX_PROJECT_DIR` for project checks |
|
||||
| Project config check fails | The project directory is missing or has an invalid `ktx.yaml` | Run `ktx setup` to resume setup |
|
||||
| Schema validation fails | `ktx.yaml` does not match the current config schema | Run `ktx status --validate --json` for structured issue details, then edit `ktx.yaml` or rerun `ktx setup` |
|
||||
| Schema validation fails | A field **ktx** recognizes has an invalid value. Unrecognized keys are reported as non-blocking warnings (exit `0`), not failures | Run `ktx status --validate --json` for structured issue details, then edit `ktx.yaml` or rerun `ktx setup` |
|
||||
| Semantic search check warns | Embeddings are not configured or the provider probe failed | Run `ktx setup` or inspect the check's `fix` field in JSON output |
|
||||
| Query history check warns | A database has query history enabled but the warehouse prerequisites are missing | Fix the warehouse extension, grants, or history access, then rerun `ktx status` |
|
||||
|
|
|
|||
|
|
@ -74,6 +74,56 @@ The public context-build entrypoint is `ktx ingest [connectionId]` or
|
|||
| `-v`, `--version` | Show the CLI package name and version. |
|
||||
| `-h`, `--help` | Show help for the current command. |
|
||||
|
||||
## Update notices
|
||||
|
||||
> **Note:** The update notifier writes only to stderr and keeps command stdout
|
||||
> unchanged.
|
||||
|
||||
When a newer package is available on your installed release channel, `ktx`
|
||||
prints a short notice after the command finishes:
|
||||
|
||||
```text
|
||||
↑ Update available: ktx 0.9.0 → 0.10.0
|
||||
npm i -g @kaelio/ktx
|
||||
```
|
||||
|
||||
Stable installs compare against the npm `latest` dist-tag.
|
||||
Release-candidate installs compare against the `next` dist-tag and show:
|
||||
|
||||
```text
|
||||
npm i -g @kaelio/ktx@next
|
||||
```
|
||||
|
||||
The check is skipped for JSON output, CI, non-TTY stdout, and hidden completion
|
||||
commands. To opt out explicitly, set any of these environment variables:
|
||||
|
||||
```bash
|
||||
KTX_NO_UPDATE_CHECK=1
|
||||
NO_UPDATE_NOTIFIER=1
|
||||
DO_NOT_TRACK=1
|
||||
```
|
||||
|
||||
The `ktx` CLI prints one npm command because globally installed binaries don't
|
||||
expose a reliable runtime package-manager signal. If you prefer another global
|
||||
package manager, use the equivalent command:
|
||||
|
||||
```bash
|
||||
pnpm add -g @kaelio/ktx
|
||||
yarn global add @kaelio/ktx
|
||||
```
|
||||
|
||||
## Build-view star prompt
|
||||
|
||||
During an interactive context build, `ktx setup` and `ktx ingest` can show a dim
|
||||
GitHub star reminder above the `Ctrl+C to stop` hint. **ktx** skips this prompt
|
||||
for CI, non-TTY output, and `DO_NOT_TRACK=1`.
|
||||
|
||||
To suppress only this prompt while keeping other notices enabled, set:
|
||||
|
||||
```bash
|
||||
KTX_NO_STAR=1
|
||||
```
|
||||
|
||||
## Project resolution
|
||||
|
||||
Most commands are project-aware. Pass `--project-dir <path>` when scripting or
|
||||
|
|
|
|||
111
docs-site/content/docs/community/ai-resources.mdx
Normal file
111
docs-site/content/docs/community/ai-resources.mdx
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
---
|
||||
title: AI Resources
|
||||
description: How coding agents read, cite, and act on the ktx docs - Markdown endpoints, a task router, and copy-paste prompts.
|
||||
---
|
||||
|
||||
This page is for coding assistants that read or cite the **ktx** docs. It covers
|
||||
the machine-readable endpoints, a task router for common questions, and
|
||||
copy-paste prompts. It is scoped to documentation lookup and safe command
|
||||
discovery - to wire **ktx** into an agent client, see
|
||||
[Agent Clients](/docs/integrations/agent-clients).
|
||||
|
||||
## Markdown endpoints
|
||||
|
||||
**ktx** docs are available as plain Markdown so assistants never have to parse
|
||||
the rendered HTML site.
|
||||
|
||||
- [`/llms.txt`](/llms.txt) - a curated index of high-value pages and agent entry
|
||||
points. **Start here.**
|
||||
- [`/llms-full.txt`](/llms-full.txt) - the entire docs corpus in one response.
|
||||
Use only when a task needs broad context across many pages.
|
||||
- **Per-page Markdown** - append `.md` to any docs URL:
|
||||
|
||||
```text
|
||||
https://docs.kaelio.com/ktx/docs/getting-started/quickstart.md
|
||||
https://docs.kaelio.com/ktx/docs/cli-reference/ktx-sl.md
|
||||
https://docs.kaelio.com/ktx/docs/guides/building-context.md
|
||||
```
|
||||
|
||||
A request for any docs URL with an `Accept: text/markdown` header returns the
|
||||
same Markdown without the `.md` suffix:
|
||||
|
||||
```bash
|
||||
curl -H "Accept: text/markdown" https://docs.kaelio.com/ktx/docs/getting-started/quickstart
|
||||
```
|
||||
|
||||
Each Markdown response leads with the page title, description, canonical URL, and
|
||||
Markdown URL; frontmatter is stripped; code blocks and tables are preserved; and
|
||||
missing pages return a plain-text `404` instead of falling back to HTML. Rendered
|
||||
pages also expose a **Copy as Markdown** action near the title.
|
||||
|
||||
### Retrieval order
|
||||
|
||||
1. Fetch [`/llms.txt`](/llms.txt).
|
||||
2. Pick one or two relevant per-page `.md` URLs.
|
||||
3. Fetch [`/llms-full.txt`](/llms-full.txt) only when page-level docs are not
|
||||
enough.
|
||||
|
||||
```bash
|
||||
curl https://docs.kaelio.com/ktx/llms.txt
|
||||
curl https://docs.kaelio.com/ktx/docs/getting-started/quickstart.md
|
||||
```
|
||||
|
||||
## Task router
|
||||
|
||||
| User asks the agent to explain... | Read first | Then read |
|
||||
|------------------------------------|------------|-----------|
|
||||
| What **ktx** does | [Introduction](/docs/getting-started/introduction) | [The Context Layer](/docs/concepts/the-context-layer) |
|
||||
| How to start from a checkout | [Quickstart](/docs/getting-started/quickstart) | [ktx setup](/docs/cli-reference/ktx-setup) |
|
||||
| How to check project readiness | [ktx status](/docs/cli-reference/ktx-status) | [Quickstart](/docs/getting-started/quickstart) |
|
||||
| How context gets built | [Building Context](/docs/guides/building-context) | [ktx ingest](/docs/cli-reference/ktx-ingest) |
|
||||
| How semantic YAML works | [Writing Context](/docs/guides/writing-context) | [ktx sl](/docs/cli-reference/ktx-sl) |
|
||||
| How machine-readable CLI output is shaped | [ktx sl](/docs/cli-reference/ktx-sl) | [ktx wiki](/docs/cli-reference/ktx-wiki) |
|
||||
|
||||
## Agent instructions
|
||||
|
||||
Paste this into a project or system prompt when an assistant needs to answer
|
||||
from the **ktx** docs:
|
||||
|
||||
```text
|
||||
When answering ktx docs questions:
|
||||
|
||||
1. Start with https://docs.kaelio.com/ktx/llms.txt.
|
||||
2. Fetch the smallest relevant Markdown page (append .md to its docs URL).
|
||||
3. Prefer the .md route over rendered HTML.
|
||||
4. Use https://docs.kaelio.com/ktx/llms-full.txt only when the task needs broad docs context.
|
||||
5. Quote commands exactly from docs pages.
|
||||
6. If docs and local CLI behavior disagree, say what differs and prefer local verified output.
|
||||
```
|
||||
|
||||
## Prompts
|
||||
|
||||
Replace project names, connection ids, and business terms with your own values.
|
||||
|
||||
**Install and configure ktx in a project**
|
||||
|
||||
```text
|
||||
Run npx skills add Kaelio/ktx --skill ktx and use the ktx skill to install and configure ktx
|
||||
```
|
||||
|
||||
**Find the right command**
|
||||
|
||||
```text
|
||||
Find the correct ktx command for this task: <task>. Start with /llms.txt, then fetch the smallest relevant CLI reference .md page. Quote the exact command and flags from the docs.
|
||||
```
|
||||
|
||||
**Review semantic changes**
|
||||
|
||||
```text
|
||||
Review the ktx semantic-layer and wiki changes in this branch. Check that measures have clear definitions, joins use valid keys, hidden or internal columns are not exposed to agents, and validation passes. List concrete file and line issues first.
|
||||
```
|
||||
|
||||
## Guardrails
|
||||
|
||||
- Do not invent CLI flags - fetch the relevant CLI reference page.
|
||||
- Do not scrape rendered HTML when a `.md` route exists.
|
||||
- Do not treat `/llms.txt` as complete documentation - use it as an index, then
|
||||
fetch the linked pages.
|
||||
- Do not include credentials or secrets in prompts, URLs, or copied docs
|
||||
snippets.
|
||||
- When docs and local CLI behavior disagree, prefer the local CLI output and
|
||||
mention the mismatch.
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"title": "Community",
|
||||
"title": "Community & Resources",
|
||||
"defaultOpen": true,
|
||||
"pages": ["support", "contributing", "telemetry"]
|
||||
"pages": ["support", "contributing", "telemetry", "ai-resources"]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,6 +46,33 @@ an operation errors, the detail we record is the error as your tools reported
|
|||
it, which can include identifiers from your setup. If you'd rather send nothing
|
||||
at all, turn telemetry off using any of the options above.
|
||||
|
||||
## Error reports
|
||||
|
||||
When telemetry is enabled, **ktx** sends PostHog Error Tracking `$exception`
|
||||
events for CLI and daemon exceptions. Error reports help group crashes and
|
||||
handled failures into PostHog issues.
|
||||
|
||||
Error reports can include:
|
||||
|
||||
- Stack frames, including function names, local file paths, line numbers, and
|
||||
SDK-provided source context.
|
||||
- Error class names and raw error messages.
|
||||
- Cause chains when the runtime exposes them.
|
||||
- `source`, `handled`, and `fatal` diagnostic fields.
|
||||
- Runtime version, OS, architecture, and CI fields.
|
||||
- The hashed `projectId` when **ktx** knows the project.
|
||||
|
||||
Error reports never intentionally include:
|
||||
|
||||
- Secrets, credentials, API keys, tokens, cookies, signed URLs, or auth headers.
|
||||
- Database URLs, connection strings, DSNs, raw argv, or raw environment values.
|
||||
- SQL text, schema names, table names, or column names as explicit payload
|
||||
properties.
|
||||
- Customer row data.
|
||||
- User prompt text or raw MCP arguments.
|
||||
|
||||
The same opt-out controls listed above disable error reports.
|
||||
|
||||
## Storage and retention
|
||||
|
||||
Telemetry is sent to PostHog, a third-party product-analytics service used by
|
||||
|
|
|
|||
154
docs-site/content/docs/concepts/cross-database-federation.mdx
Normal file
154
docs-site/content/docs/concepts/cross-database-federation.mdx
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
---
|
||||
title: Cross-database federation
|
||||
description: How ktx federates postgres, mysql, and sqlite connections so a single read-only SQL query can join across them without copying data.
|
||||
---
|
||||
|
||||
Cross-database federation lets a single read-only SQL query join tables that
|
||||
live in different databases. **ktx** achieves this by embedding DuckDB and
|
||||
using its `ATTACH` mechanism to connect each member database read-only. The
|
||||
join executes inside DuckDB at query time — live data, no ETL, no copy.
|
||||
|
||||
You run federated queries as raw SQL against the `_ktx_federated` connection
|
||||
(see [Querying the federated connection
|
||||
directly](#querying-the-federated-connection-directly)). Semantic-layer queries
|
||||
(`ktx sl query` / the `sl_query` tool) stay per-connection; pointing one at
|
||||
`_ktx_federated` returns an error telling you to use raw SQL instead.
|
||||
|
||||
Federation activates automatically when a `ktx.yaml` file declares two or more
|
||||
attach-compatible connections. There is nothing to configure and no federation
|
||||
block to add. With zero or one compatible connection the behavior is unchanged.
|
||||
|
||||
## Which connections participate
|
||||
|
||||
The v1 federation engine supports three drivers:
|
||||
|
||||
| Driver | Participates in federation |
|
||||
|--------|---------------------------|
|
||||
| `postgres` | Yes |
|
||||
| `mysql` | Yes |
|
||||
| `sqlite` | Yes |
|
||||
| `snowflake` | No — standalone connection |
|
||||
| `bigquery` | No — standalone connection |
|
||||
| `clickhouse` | No — standalone connection |
|
||||
| `sqlserver` | No — standalone connection |
|
||||
|
||||
Non-participating connections continue to work exactly as they did. They are
|
||||
queried independently; they do not appear as federation members.
|
||||
|
||||
## How it activates
|
||||
|
||||
**ktx** inspects the connections in `ktx.yaml` at startup. When it finds two or
|
||||
more connections whose driver is `postgres`, `mysql`, or `sqlite`, it
|
||||
instantiates the DuckDB federation engine and attaches each one read-only.
|
||||
There is no `federation:` key, no opt-in flag, and no connection-level setting
|
||||
to enable. The engine is derived entirely from what is already declared.
|
||||
|
||||
A minimal `ktx.yaml` that triggers federation:
|
||||
|
||||
```yaml
|
||||
connections:
|
||||
pg_books:
|
||||
driver: postgres
|
||||
url: "postgres://user:pass@localhost:5432/books" # pragma: allowlist secret
|
||||
sqlite_reviews:
|
||||
driver: sqlite
|
||||
path: ./data/reviews.db
|
||||
```
|
||||
|
||||
Two attach-compatible connections are present, so federation is active.
|
||||
|
||||
## Table naming in federated queries
|
||||
|
||||
Inside a federated query, postgres and mysql tables use a three-part name:
|
||||
`connectionId.schema.table`. SQLite tables, which have no schema layer in
|
||||
DuckDB, use the two-part form `connectionId.table`. In both cases the
|
||||
connection's `id` field in `ktx.yaml` becomes the catalog name inside DuckDB.
|
||||
|
||||
If a connection `id` is not a bare SQL identifier — for example it contains a
|
||||
hyphen, like `books-db` — double-quote it in the query the same way DuckDB
|
||||
quotes any identifier: `"books-db".public.books`. Writing it unquoted
|
||||
(`books-db.public.books`) is a SQL syntax error, not a federation feature.
|
||||
|
||||
For the example above:
|
||||
|
||||
- `pg_books.public.books` — the `books` table in the `public` schema of the
|
||||
postgres connection
|
||||
- `sqlite_reviews.reviews` — the `reviews` table in the sqlite connection
|
||||
|
||||
These fully qualified names are what you write when you query the federated
|
||||
connection with raw SQL (see [Querying the federated connection
|
||||
directly](#querying-the-federated-connection-directly)). A source file's own
|
||||
`table:` field is not prefixed this way — see [Source files keep member-native
|
||||
table refs](#source-files-keep-member-native-table-refs) below.
|
||||
|
||||
## Source names in the federated view
|
||||
|
||||
When you list or search semantic-layer sources under the federated connection,
|
||||
each source's `name` is prefixed with its member connection id — for example
|
||||
`pg_books.books` and `sqlite_reviews.reviews`. The prefix keeps names unique
|
||||
when two members own a table with the same name: a `users` table in each of
|
||||
`pg_app` and `sqlite_app` surfaces as `pg_app.users` and `sqlite_app.users`
|
||||
rather than colliding on a bare `users`.
|
||||
|
||||
## Source files keep member-native table refs
|
||||
|
||||
A source file's physical `table:` field is not prefixed with the connection id.
|
||||
It stays the member-native reference the connector uses on its own —
|
||||
`public.books` for the postgres member, `reviews` for the sqlite member —
|
||||
because the same file backs a per-connection semantic-layer query against that
|
||||
member, which runs on the member's own driver where a `pg_books.` catalog prefix
|
||||
would point at a database that does not exist. The connection-id prefix is a
|
||||
DuckDB catalog name that appears only in raw federated SQL; the member prefix on
|
||||
the source `name` (above) is independent of it.
|
||||
|
||||
## Cross-database joins
|
||||
|
||||
Write a cross-database join as raw SQL against `_ktx_federated` — see
|
||||
[Querying the federated connection
|
||||
directly](#querying-the-federated-connection-directly) below for a runnable
|
||||
example. DuckDB attaches both members and resolves the join live at query time.
|
||||
|
||||
Declaring the join in a source file's `joins:` block is not supported yet. The
|
||||
semantic layer plans each connection on its own, so a `joins:` entry whose `to:`
|
||||
points at a table in another member is not resolved across the federation
|
||||
boundary. Until that lands, express cross-database joins as raw SQL.
|
||||
|
||||
## Querying the federated connection directly
|
||||
|
||||
The federated connection is addressable by its id,
|
||||
`_ktx_federated`, anywhere **ktx** runs read-only SQL. The same id works for the
|
||||
`ktx sql` command and for a data agent calling the `sql_execution` MCP tool, so
|
||||
both surfaces can run a cross-database query without a source file:
|
||||
|
||||
```bash
|
||||
ktx sql -c _ktx_federated \
|
||||
"SELECT b.title, avg(r.rating) AS avg_rating
|
||||
FROM pg_books.public.books b
|
||||
JOIN sqlite_reviews.reviews r ON b.id = r.book_id
|
||||
GROUP BY b.title"
|
||||
```
|
||||
|
||||
Table names follow the rules from
|
||||
[Table naming in federated queries](#table-naming-in-federated-queries):
|
||||
three-part `connectionId.schema.table` for postgres and mysql, two-part
|
||||
`connectionId.table` for sqlite. The `_ktx_federated` id is virtual — it is
|
||||
never written to `ktx.yaml` and only exists when two or more attach-compatible
|
||||
connections are declared. It surfaces in `ktx connection` and in the agent's
|
||||
connection list so the id is discoverable. Querying a single member database
|
||||
directly with its own connection id (`ktx sql -c pg_books ...`) is unchanged.
|
||||
|
||||
## Federated queries are read-only
|
||||
|
||||
DuckDB attaches every member database with read-only access. Federated queries
|
||||
are `SELECT`/`WITH` only. No writes, no DDL, and no mutations reach any member
|
||||
database through the federation engine.
|
||||
|
||||
## Current limitations
|
||||
|
||||
- **Raw SQL joins only.** Cross-database joins are written as raw SQL; declaring
|
||||
them in a source's `joins:` block and automatic discovery of cross-database
|
||||
relationships are not available yet. Intra-database relationship discovery for
|
||||
each member connection is unchanged.
|
||||
- **postgres, mysql, and sqlite only.** Other drivers (snowflake, bigquery,
|
||||
clickhouse, sqlserver) do not participate in federation in this version. They
|
||||
remain usable as standalone connections.
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"title": "Concepts",
|
||||
"defaultOpen": true,
|
||||
"pages": ["the-context-layer", "semantic-layer-internals", "wiki-retrieval"]
|
||||
"pages": ["the-context-layer", "semantic-layer-internals", "cross-database-federation", "wiki-retrieval"]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ read, how to think, and where to put the results.
|
|||
</p>
|
||||
<ul className="mt-3 space-y-2 text-sm leading-6 text-fd-foreground">
|
||||
<li><code className="text-[13px] font-semibold">llm</code> - provider, models, prompt cache</li>
|
||||
<li><code className="text-[13px] font-semibold">ingest</code> - adapters, embeddings, work units</li>
|
||||
<li><code className="text-[13px] font-semibold">ingest</code> - connectors, embeddings, work units</li>
|
||||
<li><code className="text-[13px] font-semibold">scan</code> - enrichment, relationships</li>
|
||||
<li><code className="text-[13px] font-semibold">agent</code> - research-agent feature flags</li>
|
||||
</ul>
|
||||
|
|
@ -344,15 +344,14 @@ setup:
|
|||
|
||||
## `storage`
|
||||
|
||||
`storage` controls where **ktx** keeps its own state and search index, and how
|
||||
state changes are committed. Defaults work for a single-user local project.
|
||||
`storage` controls where **ktx** keeps its own state and search index. Defaults
|
||||
work for a single-user local project.
|
||||
|
||||
```yaml
|
||||
storage:
|
||||
state: sqlite # sqlite | postgres
|
||||
search: sqlite-fts5 # sqlite-fts5 | postgres-hybrid
|
||||
git:
|
||||
auto_commit: true
|
||||
author: "ktx <ktx@example.com>"
|
||||
```
|
||||
|
||||
|
|
@ -360,8 +359,7 @@ storage:
|
|||
|-------|------|---------|---------|
|
||||
| `state` | `sqlite` \| `postgres` | `sqlite` | Backend for ktx state. `sqlite` uses `.ktx/db.sqlite`; `postgres` expects a configured Postgres connection. |
|
||||
| `search` | `sqlite-fts5` \| `postgres-hybrid` | `sqlite-fts5` | Backend for search indexes. `postgres-hybrid` combines lexical and vector search in Postgres. |
|
||||
| `git.auto_commit` | `boolean` | `true` | When `true`, ktx auto-commits changes to the git-backed state store. |
|
||||
| `git.author` | `string` | `ktx <ktx@example.com>` | Git author identity for auto-commits. Standard `Name <email>` form. |
|
||||
| `git.author` | `string` | `ktx <ktx@example.com>` | Git author identity for commits. Standard `Name <email>` form. |
|
||||
|
||||
## `llm`
|
||||
|
||||
|
|
@ -377,6 +375,10 @@ llm:
|
|||
models:
|
||||
default: claude-sonnet-4-6
|
||||
triage: claude-haiku-4-5
|
||||
candidateExtraction: claude-sonnet-4-6
|
||||
curator: claude-opus-4-7
|
||||
reconcile: claude-opus-4-7
|
||||
repair: claude-haiku-4-5
|
||||
promptCaching:
|
||||
enabled: true
|
||||
systemTtl: 1h
|
||||
|
|
@ -404,6 +406,11 @@ llm:
|
|||
backend: codex
|
||||
models:
|
||||
default: gpt-5.5
|
||||
triage: gpt-5.5
|
||||
candidateExtraction: gpt-5.5
|
||||
curator: gpt-5.5
|
||||
reconcile: gpt-5.5
|
||||
repair: gpt-5.5
|
||||
```
|
||||
|
||||
### Model roles
|
||||
|
|
@ -433,7 +440,7 @@ provider-specific model identifiers.
|
|||
## `ingest`
|
||||
|
||||
`ingest` controls how **ktx** builds context from your stack. It lists the
|
||||
adapters to run, the embedding provider used when adapters embed documents,
|
||||
connectors to run, the embedding provider used when connectors embed documents,
|
||||
and the concurrency and failure policy for work units.
|
||||
|
||||
```yaml
|
||||
|
|
@ -452,14 +459,24 @@ ingest:
|
|||
stepBudget: 40
|
||||
maxConcurrency: 2
|
||||
failureMode: continue
|
||||
rateLimit:
|
||||
enabled: true
|
||||
throttleThreshold: 0.8
|
||||
minConcurrencyUnderPressure: 1
|
||||
maxWaitMs: 600000
|
||||
retry:
|
||||
maxAttempts: 6
|
||||
baseDelayMs: 1000
|
||||
maxDelayMs: 60000
|
||||
jitter: true
|
||||
```
|
||||
|
||||
### Adapters
|
||||
### Connectors
|
||||
|
||||
`adapters` is a list of adapter IDs that should run. Each ID matches a
|
||||
`adapters` is a list of connector IDs that should run. Each ID matches a
|
||||
connector that **ktx** ships locally:
|
||||
|
||||
| Adapter ID | What it ingests |
|
||||
| Connector ID | What it ingests |
|
||||
|------------|-----------------|
|
||||
| `live-database` | Live warehouse introspection (schemas, tables, columns, samples). |
|
||||
| `historic-sql` | Query history from Postgres `pg_stat_statements`, BigQuery `INFORMATION_SCHEMA.JOBS`, or Snowflake query history. |
|
||||
|
|
@ -469,7 +486,7 @@ connector that **ktx** ships locally:
|
|||
| `looker` | Looker dashboards and looks via the API. |
|
||||
| `metabase` | Metabase cards, dashboards, and database mappings. |
|
||||
| `notion` | Notion pages and databases for wiki context. |
|
||||
| `fake` | Test/demo adapter. Useful in fixtures. |
|
||||
| `fake` | Test/demo connector. Useful in fixtures. |
|
||||
|
||||
### Embeddings
|
||||
|
||||
|
|
@ -498,6 +515,24 @@ handles failures.
|
|||
| `workUnits.maxConcurrency` | `int > 0` | `1` | How many work units run in parallel. |
|
||||
| `workUnits.failureMode` | `abort` \| `continue` | `continue` | `abort` stops the whole ingest run on the first failure; `continue` records it and keeps going. |
|
||||
|
||||
### Rate limits
|
||||
|
||||
`rateLimit` controls provider-neutral pacing for LLM calls during ingest. When a
|
||||
provider reports a subscription window, retry-after delay, or HTTP 429,
|
||||
**ktx** pauses new work-unit model calls, shows a transient wait in the CLI,
|
||||
and reduces work-unit concurrency while the provider is under pressure.
|
||||
|
||||
| Field | Type | Default | Purpose |
|
||||
|-------|------|---------|---------|
|
||||
| `rateLimit.enabled` | `boolean` | `true` | Master switch for ingest LLM rate-limit pacing and visible waits. |
|
||||
| `rateLimit.throttleThreshold` | `number between 0 and 1` | `0.8` | Fraction of a known provider window at which **ktx** starts reducing concurrency. |
|
||||
| `rateLimit.minConcurrencyUnderPressure` | `int > 0` | `1` | Effective work-unit concurrency while a provider is under rate-limit pressure. |
|
||||
| `rateLimit.maxWaitMs` | `int > 0` | unset | Caps how long a single provider-reset wait can last. This bounds each wait, not the whole run: after a capped wait elapses **ktx** retries and may pause again. Omit to wait until the provider's reset time. |
|
||||
| `rateLimit.retry.maxAttempts` | `int > 0` | `6` | Maximum attempts for a single rate-limited LLM call before the failure surfaces (counts the first try). Also bounds how far opaque backoff grows for responses without a reset time or retry-after value. |
|
||||
| `rateLimit.retry.baseDelayMs` | `int > 0` | `1000` | Initial opaque retry delay in milliseconds. |
|
||||
| `rateLimit.retry.maxDelayMs` | `int > 0` | `60000` | Maximum opaque retry delay in milliseconds. |
|
||||
| `rateLimit.retry.jitter` | `boolean` | `true` | Add jitter to opaque retry delays. |
|
||||
|
||||
## `scan`
|
||||
|
||||
`scan` configures how schema-level inputs become structured context:
|
||||
|
|
@ -571,19 +606,6 @@ agent:
|
|||
| `run_research.max_iterations` | `int ≥ 0` | `20` | Maximum tool-call iterations per research run. |
|
||||
| `run_research.default_toolset` | `string[]` | `[sl_query, wiki_search, sl_read_source]` | Tool identifiers exposed to the research agent. |
|
||||
|
||||
## `memory`
|
||||
|
||||
`memory` controls the agent memory subsystem.
|
||||
|
||||
```yaml
|
||||
memory:
|
||||
auto_commit: true
|
||||
```
|
||||
|
||||
| Field | Type | Default | Purpose |
|
||||
|-------|------|---------|---------|
|
||||
| `auto_commit` | `boolean` | `true` | When `true`, ktx auto-commits memory updates to the git-backed store. |
|
||||
|
||||
## A full example
|
||||
|
||||
Combining the blocks above:
|
||||
|
|
@ -608,13 +630,17 @@ storage:
|
|||
state: sqlite
|
||||
search: sqlite-fts5
|
||||
git:
|
||||
auto_commit: true
|
||||
author: "ktx <ktx@example.com>"
|
||||
llm:
|
||||
provider:
|
||||
backend: claude-code
|
||||
models:
|
||||
default: sonnet
|
||||
triage: haiku
|
||||
candidateExtraction: sonnet
|
||||
curator: opus
|
||||
reconcile: opus
|
||||
repair: haiku
|
||||
ingest:
|
||||
adapters:
|
||||
- live-database
|
||||
|
|
@ -636,17 +662,25 @@ scan:
|
|||
agent:
|
||||
run_research:
|
||||
enabled: true
|
||||
memory:
|
||||
auto_commit: true
|
||||
```
|
||||
|
||||
## Validating your config
|
||||
|
||||
**ktx** validates `ktx.yaml` strictly: unknown keys at the top level or inside
|
||||
strict blocks cause setup and CLI commands to fail with a precise path
|
||||
(`scan.relationships.acceptThreshhold: Unrecognized key`). Warehouse
|
||||
connections accept extra driver-specific fields, so passthrough values like
|
||||
`historicSql` and `context.queryHistory` are allowed.
|
||||
**ktx** validates `ktx.yaml` when it loads, and treats two kinds of problems
|
||||
differently:
|
||||
|
||||
- **An invalid value on a field ktx recognizes** (for example
|
||||
`llm.provider.backend: nope`) is a hard error. Setup and CLI commands stop and
|
||||
report the exact path so you can fix it.
|
||||
- **An unrecognized key** — one left over from a different **ktx** version, or a
|
||||
typo such as `scan.relationships.acceptThreshhold` — is tolerated, not fatal.
|
||||
**ktx** ignores the key and keeps running, so a misspelled field quietly falls
|
||||
back to its default instead of taking effect. `ktx status` lists each ignored
|
||||
key as a warning (and exits `0`) so you can remove or correct it when
|
||||
convenient.
|
||||
|
||||
Warehouse connections accept extra driver-specific fields, so passthrough values
|
||||
like `historicSql` and `context.queryHistory` are allowed.
|
||||
|
||||
To re-validate without running anything else:
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ description: ktx is an open-source, self-improving context layer for data agents
|
|||
---
|
||||
|
||||
import { ProductMechanics } from "@/components/product-mechanics";
|
||||
import { ProductRuntime } from "@/components/product-runtime";
|
||||
|
||||
<div className="not-prose mb-10">
|
||||
<div>
|
||||
|
|
@ -23,7 +24,7 @@ import { ProductMechanics } from "@/components/product-mechanics";
|
|||
>
|
||||
Make analytics context usable by agents
|
||||
</h1>
|
||||
<p className="mt-4 max-w-2xl text-lg text-fd-muted-foreground" style={{ lineHeight: '1.7' }}>
|
||||
<p className="mt-4 max-w-full text-lg text-fd-muted-foreground" style={{ lineHeight: '1.7' }}>
|
||||
{'ktx is an open-source context layer for data agents. It turns warehouse metadata, BI tool definitions, query history, docs, and approved metric definitions into reviewable files agents can search and execute.'}
|
||||
</p>
|
||||
</div>
|
||||
|
|
@ -59,6 +60,8 @@ serves that context to agents at runtime.
|
|||
|
||||
<ProductMechanics />
|
||||
|
||||
<ProductRuntime />
|
||||
|
||||
## Use it for
|
||||
|
||||
Use **ktx** when agents need more than raw database access. Agents can search wiki
|
||||
|
|
@ -92,8 +95,8 @@ best first step for users; contributor setup lives in the community docs.
|
|||
<Card title="CLI Reference" href="/docs/cli-reference/ktx">
|
||||
Complete flag and subcommand reference for every **ktx** command.
|
||||
</Card>
|
||||
<Card title="Agent Quickstart" href="/docs/ai-resources/agent-quickstart">
|
||||
Machine-readable docs and agent-facing setup notes.
|
||||
<Card title="AI Resources" href="/docs/community/ai-resources">
|
||||
Machine-readable docs, a task router, and copy-paste agent prompts.
|
||||
</Card>
|
||||
</Cards>
|
||||
|
||||
|
|
|
|||
|
|
@ -9,8 +9,8 @@ This guide takes a local analytics project from empty to agent-ready. You'll
|
|||
install the CLI, run one guided setup command, and hand the context to a
|
||||
coding assistant.
|
||||
|
||||
If you're a coding assistant choosing a docs route, start with the
|
||||
[Agent Quickstart](/docs/ai-resources/agent-quickstart) instead.
|
||||
If you're a coding assistant choosing a docs route, start with
|
||||
[AI Resources](/docs/community/ai-resources) instead.
|
||||
|
||||
<div
|
||||
className="not-prose my-8 overflow-hidden rounded-2xl border"
|
||||
|
|
@ -191,6 +191,12 @@ Install the published package globally:
|
|||
npm install -g @kaelio/ktx
|
||||
```
|
||||
|
||||
To upgrade an existing install later, re-run with the `@latest` tag:
|
||||
|
||||
```bash
|
||||
npm install -g @kaelio/ktx@latest
|
||||
```
|
||||
|
||||
**ktx** is open source. If you'd like to hack on it or run from a local checkout,
|
||||
the source lives at [github.com/kaelio/ktx](https://github.com/kaelio/ktx) -
|
||||
see [Contributing](/docs/community/contributing) to get set up.
|
||||
|
|
@ -301,12 +307,12 @@ connection is unreachable or misconfigured the build is blocked up front and
|
|||
**ktx** names the failing connection by id and connector type:
|
||||
|
||||
```text
|
||||
KTX cannot build context: a required connection failed its live test.
|
||||
ktx cannot build context: a required connection failed its live test.
|
||||
|
||||
Failed connections:
|
||||
warehouse (postgres)
|
||||
|
||||
Each connection must be reachable before KTX builds context.
|
||||
Each connection must be reachable before ktx builds context.
|
||||
Run `ktx connection test <id>` to see the error, fix the connection, then retry.
|
||||
```
|
||||
|
||||
|
|
@ -332,6 +338,16 @@ separate `ktx` binary on `PATH`. If the CLI path changes, rerun
|
|||
## What setup writes
|
||||
|
||||
**ktx** writes plain files so people and agents can review changes in git.
|
||||
**ktx** initializes a git repository at the project directory and writes context
|
||||
changes there. If the project directory is nested inside another repository,
|
||||
**ktx** still keeps its own repo and does not commit to the parent repo.
|
||||
|
||||
Because **ktx** owns that repository, it will not adopt one it did not create. If
|
||||
you point setup at a directory that is already a git repository's root - such as
|
||||
an existing application checkout - **ktx** stops and asks you to pick a dedicated
|
||||
directory instead. In the setup wizard choose the **New subfolder** option (for
|
||||
example `ktx-project`), or pass a fresh `--project-dir` when running setup
|
||||
non-interactively.
|
||||
|
||||
| Path | Purpose |
|
||||
|------|---------|
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ Local-auth backends keep provider credentials out of `ktx.yaml`:
|
|||
|
||||
```bash
|
||||
ktx setup --llm-backend claude-code --no-input
|
||||
ktx setup --llm-backend codex --llm-model gpt-5.5 --no-input
|
||||
ktx setup --llm-backend codex --no-input
|
||||
```
|
||||
|
||||
With `claude-code`, **ktx** agent loops can invoke only the **ktx** MCP tools
|
||||
|
|
|
|||
|
|
@ -30,19 +30,19 @@ llm:
|
|||
default: sonnet
|
||||
triage: haiku
|
||||
candidateExtraction: sonnet
|
||||
curator: sonnet
|
||||
reconcile: sonnet
|
||||
repair: sonnet
|
||||
curator: opus
|
||||
reconcile: opus
|
||||
repair: haiku
|
||||
```
|
||||
|
||||
During setup, choose the backend interactively or pass the model in automation:
|
||||
During setup, choose the backend interactively or pass it in automation:
|
||||
|
||||
```bash
|
||||
ktx setup --llm-backend claude-code --llm-model opus --no-input
|
||||
ktx setup --llm-backend claude-code --no-input
|
||||
```
|
||||
|
||||
For Claude Code, `sonnet`, `opus`, and `haiku` map to **ktx** defaults. Full Claude
|
||||
model IDs are also accepted.
|
||||
Setup writes `sonnet`, `haiku`, and `opus` aliases into `llm.models`. You can
|
||||
edit any role to another alias or a full Claude model ID after setup.
|
||||
|
||||
`claude-code` exposes only **ktx** MCP tools for the current agent loop. SDK init
|
||||
metadata may still list host slash commands, skills, and subagents; **ktx** does not
|
||||
|
|
@ -59,12 +59,17 @@ llm:
|
|||
backend: codex
|
||||
models:
|
||||
default: gpt-5.5
|
||||
triage: gpt-5.5
|
||||
candidateExtraction: gpt-5.5
|
||||
curator: gpt-5.5
|
||||
reconcile: gpt-5.5
|
||||
repair: gpt-5.5
|
||||
```
|
||||
|
||||
Configure it non-interactively:
|
||||
|
||||
```bash
|
||||
ktx setup --llm-backend codex --llm-model gpt-5.5 --no-input
|
||||
ktx setup --llm-backend codex --no-input
|
||||
```
|
||||
|
||||
This is separate from Codex agent-client setup. `ktx setup --agents --target
|
||||
|
|
|
|||
|
|
@ -61,11 +61,14 @@ committing the file.
|
|||
|
||||
## A typical review session
|
||||
|
||||
The loop above describes the shape. In practice, one review session looks like
|
||||
this:
|
||||
The loop above describes the shape. Run these commands from the **ktx** project
|
||||
directory. **ktx** keeps that directory as its own git repository, even when the
|
||||
directory lives inside another repository, so reviewing context changes never
|
||||
requires committing to a parent application repo.
|
||||
|
||||
```bash
|
||||
# 1. Run ingest on a branch
|
||||
cd /path/to/ktx-project
|
||||
git checkout -b ingest/2026-05-21
|
||||
ktx ingest --all
|
||||
|
||||
|
|
|
|||
|
|
@ -44,12 +44,17 @@ Use this order for most context changes:
|
|||
Semantic sources are YAML files for queryable tables or custom SQL. They define
|
||||
agent-facing measures, dimensions, segments, joins, and grain.
|
||||
|
||||
Semantic source files live at:
|
||||
Semantic source files live under:
|
||||
|
||||
```text
|
||||
semantic-layer/<connection-id>/<source-name>.yaml
|
||||
semantic-layer/<connection-id>/
|
||||
```
|
||||
|
||||
The file's `name:` field is the source's identity — it carries the warehouse
|
||||
identifier verbatim, including case. The filename is a derived label: simple
|
||||
lowercase names get `<source-name>.yaml`, anything else gets a slugged
|
||||
filename. Renaming a file does not rename the source.
|
||||
|
||||
### Minimal source
|
||||
|
||||
```yaml
|
||||
|
|
@ -152,7 +157,7 @@ joins:
|
|||
|
||||
| Field | Required | Description |
|
||||
|-------|----------|-------------|
|
||||
| `name` | Yes | Source identifier. Use lowercase words and underscores. |
|
||||
| `name` | Yes | Source identity (not the filename). When overlaying an ingested table, match the manifest identifier verbatim, including case (e.g. `SIGNED_UP`); for a new standalone source, lowercase words and underscores are recommended. |
|
||||
| `descriptions` | No | Description map keyed by source, such as `user`, `dbt`, or `ai`. |
|
||||
| `table` or `sql` | Yes | Database table or custom SQL expression. Use exactly one. |
|
||||
| `grain` | Yes | Columns that uniquely identify a row at the source grain. |
|
||||
|
|
|
|||
|
|
@ -68,19 +68,30 @@ If you choose an install mode, it then asks which targets to install:
|
|||
└
|
||||
```
|
||||
|
||||
When every selected target supports both project and global setup, the command
|
||||
also asks where to install supported agent config:
|
||||
When at least one selected target supports project-scoped setup, the command
|
||||
asks where to install agent config:
|
||||
|
||||
```txt
|
||||
◆ Where should ktx install supported agent config?
|
||||
◆ Where should ktx install agent config?
|
||||
│
|
||||
│ ktx project: /path/to/your/ktx-project
|
||||
│
|
||||
│ ○ Project scope (ktx project directory)
|
||||
│ ○ ktx project directory /path/to/your/ktx-project
|
||||
│ ○ Current directory /path/to/where/you/ran/ktx
|
||||
│ ○ Custom directory… (enter a path)
|
||||
│ ○ Global scope (user config)
|
||||
└
|
||||
```
|
||||
|
||||
The first three choices write project-scoped files (`.claude/`, `.mcp.json`,
|
||||
`.cursor/`, skills, and rules) into the chosen directory while still pointing
|
||||
them at this ktx project. Use **Current directory** or **Custom directory…**
|
||||
when you open your coding agent from somewhere other than the ktx project
|
||||
directory. **Current directory** is hidden when it is already the ktx project
|
||||
directory, and **Global scope** appears only when every selected target
|
||||
supports global setup. Non-interactive runs pass `--install-dir <path>` (for
|
||||
example `--install-dir .`) for the same result.
|
||||
|
||||
## Generated files
|
||||
|
||||
**ktx** writes MCP client configuration and analytics guidance by default. It writes
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
---
|
||||
title: Context Sources
|
||||
description: Ingest semantic context from dbt, MetricFlow, LookML, Metabase, Looker, and Notion.
|
||||
description: Ingest semantic context from dbt, MetricFlow, LookML, Metabase, Looker, Notion, and Google Drive.
|
||||
---
|
||||
|
||||
Context sources feed your existing analytics tooling into **ktx**. During ingestion, **ktx** extracts metadata from each source and uses a reconciliation agent to reconcile it with your existing semantic layer and knowledge base - preserving accepted edits rather than overwriting.
|
||||
|
|
@ -27,7 +27,7 @@ LookML uses top-level `repoUrl`, and MetricFlow uses nested
|
|||
|
||||
| Field | Required | Description |
|
||||
|-------|----------|-------------|
|
||||
| `driver` | Yes | Source connector: `dbt`, `metricflow`, `lookml`, `metabase`, `looker`, or `notion` |
|
||||
| `driver` | Yes | Source connector: `dbt`, `metricflow`, `lookml`, `metabase`, `looker`, `notion`, or `gdrive` |
|
||||
| `source_dir` | For local file sources | Absolute or project-relative source directory |
|
||||
| `repo_url` | For Git-hosted dbt sources | Git repository URL |
|
||||
| `repoUrl` | For Git-hosted LookML sources | Git repository URL |
|
||||
|
|
@ -38,15 +38,16 @@ LookML uses top-level `repoUrl`, and MetricFlow uses nested
|
|||
|
||||
## dbt
|
||||
|
||||
Ingests schema definitions, model descriptions, column metadata, and test coverage from a dbt project.
|
||||
Ingests schema definitions, model descriptions, column metadata, and column test definitions from a dbt project.
|
||||
|
||||
### What it provides
|
||||
|
||||
- Model and source definitions from `schema.yml` files
|
||||
- Column descriptions and types
|
||||
- Test coverage signals
|
||||
- Semantic model references (if using dbt semantic layer)
|
||||
- Data lineage between models
|
||||
- Column names, descriptions, and data types
|
||||
- Column tests, mapped to semantic facts — `not_null` / `unique` become column constraints, `accepted_values` becomes enum value lists, and `relationships` becomes join / foreign-key edges
|
||||
- Model and source tags, and source freshness settings
|
||||
|
||||
MetricFlow `semantic_models:` and `metrics:` are ingested through the separate [MetricFlow](#metricflow) source, not the dbt driver.
|
||||
|
||||
### Connection config
|
||||
|
||||
|
|
@ -87,9 +88,9 @@ connections:
|
|||
|
||||
### What gets ingested
|
||||
|
||||
- YAML semantic sources generated from dbt schema files
|
||||
- One work unit per semantic source (for projects with >25 YAML files) or all at once for smaller projects
|
||||
- Column descriptions, tests, and relationships are preserved
|
||||
- **Semantic-layer overlays** (`semantic-layer/*.yaml`): descriptions, constraints, enum values, and joins from the dbt YAML are written onto the semantic source for the matching warehouse table. Overlays land on the warehouse connection that owns the table, which is usually a different connection than the dbt source itself.
|
||||
- **Wiki pages** (`wiki/`): for definitions or relationships that don't map to a confirmed physical table.
|
||||
- **Work units** for parallel processing: one per schema file under `models/` when the project has more than 25 YAML files, otherwise a single combined unit.
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -101,7 +102,7 @@ Ingests MetricFlow semantic models and metric definitions. Useful when your team
|
|||
|
||||
- Semantic model definitions (entities, dimensions, measures)
|
||||
- Cross-model metric definitions
|
||||
- Dimension and entity relationships between models
|
||||
- Entity relationships between models, inferred from matching foreign and primary entities
|
||||
|
||||
### Connection config
|
||||
|
||||
|
|
@ -133,7 +134,7 @@ For a local path:
|
|||
|
||||
### What gets ingested
|
||||
|
||||
- Semantic models with their entities, dimensions, and measures
|
||||
- Semantic models with their entities, dimensions, measures, and the join edges inferred from entity relationships
|
||||
- Metric definitions with their expressions and filters
|
||||
- Work units organized by connected component (metrics + related semantic models grouped together)
|
||||
|
||||
|
|
@ -178,10 +179,10 @@ For a local path:
|
|||
|
||||
### What gets ingested
|
||||
|
||||
- View and model definitions organized by connected component
|
||||
- LookML field types mapped to semantic layer column types
|
||||
- Join definitions and relationship cardinalities
|
||||
- SQL table references for warehouse mapping validation
|
||||
- One work unit per model, plus a unit for orphan views and one per dashboard
|
||||
- Semantic-layer sources per view — overlays for thin `sql_table_name` wrappers, standalone sources for `derived_table` views
|
||||
- Measures, joins (with their Looker `relationship:`), and field types mapped to column types (`yesno` → boolean, date/timestamp → time)
|
||||
- Wiki pages for relationships and descriptions, with warehouse identifiers verified before writing
|
||||
|
||||
### Warehouse mapping
|
||||
|
||||
|
|
@ -192,19 +193,19 @@ Optionally validate that LookML references match your expected Looker connection
|
|||
expectedLookerConnectionName: postgres_connection
|
||||
```
|
||||
|
||||
This validates that LookML model `connection:` declarations match expectations, flagging mismatches during ingestion.
|
||||
This compares each model's `connection:` declaration against the expected name. Mismatched models are flagged, and semantic-layer writes are disabled for them during that ingest while wiki extraction still proceeds.
|
||||
|
||||
---
|
||||
|
||||
## Metabase
|
||||
|
||||
Ingests dashboards, questions, and their underlying SQL queries from a Metabase instance. Maps Metabase databases to your **ktx** warehouse connections.
|
||||
Ingests collections, questions, models, and metrics — with their underlying SQL — from a Metabase instance. Maps Metabase databases to your **ktx** warehouse connections.
|
||||
|
||||
### What it provides
|
||||
|
||||
- Dashboard metadata and organization
|
||||
- Question/query definitions (native SQL and structured queries)
|
||||
- Table and column usage patterns from queries
|
||||
- Collections and their hierarchy, used to organize ingested context
|
||||
- Questions, models, and metrics — resolved SQL for both native and structured (MBQL) queries
|
||||
- Each card's output schema: column types and primary/foreign-key hints
|
||||
- Database-to-warehouse relationship mapping
|
||||
|
||||
### Connection config
|
||||
|
|
@ -233,9 +234,9 @@ Generate an API key in Metabase: **Admin > Settings > Authentication > API Keys*
|
|||
|
||||
### What gets ingested
|
||||
|
||||
- Semantic sources generated from SQL queries in questions
|
||||
- Wiki pages for dashboards (purpose, key metrics, relationships)
|
||||
- Work units per dashboard and per question
|
||||
- Semantic-layer sources generated from each card's resolved SQL and column metadata, written to the mapped warehouse connection
|
||||
- Fallback wiki notes only when a referenced table can't be mapped or an identifier can't be verified
|
||||
- One work unit per Metabase collection; re-syncs reprocess only collections with changed cards
|
||||
|
||||
### Warehouse mapping
|
||||
|
||||
|
|
@ -289,10 +290,10 @@ Generate API credentials in Looker: **Admin > Users > Edit > API Keys**.
|
|||
|
||||
### What gets ingested
|
||||
|
||||
- Semantic sources from explore field definitions
|
||||
- Wiki pages for dashboards (purpose, audience, key metrics)
|
||||
- Triage signals for automated content classification
|
||||
- Work units per explore and per dashboard
|
||||
- Semantic-layer sources from explore fields, written to the mapped warehouse connection (mapped explores only)
|
||||
- Wiki pages capturing reusable metric, segment, and domain knowledge from dashboards and Looks
|
||||
- Usage and recency signals that drive a triage gate, focusing processing on high-value content
|
||||
- Work units per explore, per dashboard, and per Look
|
||||
|
||||
### Warehouse mapping
|
||||
|
||||
|
|
@ -314,10 +315,10 @@ Ingests pages and databases from a Notion workspace as wiki pages. Useful for ca
|
|||
|
||||
### What it provides
|
||||
|
||||
- Wiki pages synthesized from Notion content
|
||||
- Page hierarchy and relationships
|
||||
- Database schemas (when Notion databases describe primary sources)
|
||||
- Semantic clustering for organized ingestion
|
||||
- Notion pages crawled from selected roots or all accessible content
|
||||
- Page bodies and blocks normalized to Markdown
|
||||
- Page hierarchy and cross-page links (child pages, mentions, relations)
|
||||
- Notion databases and their data-source rows as individual pages
|
||||
|
||||
### Connection config
|
||||
|
||||
|
|
@ -356,6 +357,7 @@ Create an integration at [notion.so/my-integrations](https://www.notion.so/my-in
|
|||
| `crawl_mode` | `all_accessible` or `selected_roots` | - |
|
||||
| `root_page_ids` | Page IDs to crawl from (for `selected_roots`) | `[]` |
|
||||
| `root_database_ids` | Database IDs to include | `[]` |
|
||||
| `root_data_source_ids` | Data-source IDs to include (for `selected_roots`) | `[]` |
|
||||
| `max_pages_per_run` | Pages processed per sync | `1000` |
|
||||
| `max_knowledge_creates_per_run` | New pages created per sync | `25` |
|
||||
| `max_knowledge_updates_per_run` | Pages updated per sync | `20` |
|
||||
|
|
@ -363,17 +365,83 @@ Create an integration at [notion.so/my-integrations](https://www.notion.so/my-in
|
|||
### What gets ingested
|
||||
|
||||
- Wiki pages synthesized from Notion content (not raw copies)
|
||||
- Domain context extracted and organized by topic
|
||||
- Triage signals for classifying page relevance
|
||||
- Work units clustered by semantic similarity for efficient processing
|
||||
- Semantic-layer sources when a page defines a reusable dataset or metric mapped to a confirmed non-Notion target; otherwise the fact stays wiki-only
|
||||
- Page-relevance triage that skips transient content (task lists, status updates, date-titled snapshots)
|
||||
- Work units clustered by embedding similarity for efficient synthesis
|
||||
|
||||
### Notes
|
||||
|
||||
- Notion is knowledge-only - it does not produce semantic layer sources
|
||||
- Notion is wiki-first: it writes durable wiki pages by default and only emits semantic-layer sources for content mapped to a confirmed non-Notion target; unmapped facts stay wiki-only
|
||||
- Rate limits apply; large workspaces may require multiple ingestion runs
|
||||
- Incremental sync cursors are stored in `.ktx/db.sqlite`; don't add
|
||||
`last_successful_cursor` to `ktx.yaml`
|
||||
|
||||
---
|
||||
|
||||
## Google Drive
|
||||
|
||||
Ingests Google Docs from a shared Google Drive folder as wiki-ready knowledge content. This v1 implementation is knowledge-only and ingests Google Docs MIME types only.
|
||||
|
||||
### What it provides
|
||||
|
||||
- Wiki pages synthesized from Google Docs content
|
||||
- Folder-scoped knowledge ingestion from a specific Drive folder
|
||||
- Markdown normalization for headings, lists, paragraphs, links, common inline formatting, and Google Docs tables
|
||||
|
||||
### Connection config
|
||||
|
||||
```yaml title="ktx.yaml"
|
||||
connections:
|
||||
company-docs:
|
||||
driver: gdrive
|
||||
service_account_key_ref: file:/absolute/path/to/google-service-account.json
|
||||
folder_id: your-google-drive-folder-id
|
||||
recursive: false
|
||||
```
|
||||
|
||||
### Authentication
|
||||
|
||||
| Method | Config |
|
||||
|--------|--------|
|
||||
| Service account JSON key file | `service_account_key_ref: file:/absolute/path/to/key.json` |
|
||||
|
||||
### Google Cloud setup
|
||||
|
||||
1. Create a Google Cloud project.
|
||||
2. Enable the Google Drive API.
|
||||
3. Enable the Google Docs API.
|
||||
4. Create a service account.
|
||||
5. Download the service account JSON key.
|
||||
6. Share the target Drive folder with the service account email.
|
||||
7. Reference the key in `ktx.yaml` with `service_account_key_ref`.
|
||||
|
||||
### Required scopes
|
||||
|
||||
- `https://www.googleapis.com/auth/drive.readonly`
|
||||
- `https://www.googleapis.com/auth/documents.readonly`
|
||||
|
||||
### Configuration options
|
||||
|
||||
| Field | Description | Default |
|
||||
|-------|-------------|---------|
|
||||
| `service_account_key_ref` | File reference to the service account JSON key | - |
|
||||
| `folder_id` | Google Drive folder ID to ingest | - |
|
||||
| `recursive` | Traverse subfolders under `folder_id` | `false` |
|
||||
|
||||
### What gets ingested
|
||||
|
||||
- Google Docs documents only
|
||||
- Wiki-oriented knowledge content
|
||||
- One work unit per staged Google Doc
|
||||
|
||||
### Notes
|
||||
|
||||
- `gdrive` is knowledge-only in v1; it does not produce semantic layer sources
|
||||
- `ktx setup` supports Google Drive configuration, including the service-account key ref, folder id, and recursive crawl flag
|
||||
- `ktx connection test <connectionId>` supports `gdrive`: it verifies that `folder_id` resolves to a folder the service account can read, then reports the number of Google Docs visible in it. A wrong or unshared `folder_id` fails the test instead of reporting zero docs
|
||||
- Only Google Docs are ingested in v1; other file types (Sheets, Slides, PDFs) in the folder are skipped and recorded in the staged manifest
|
||||
- The service account must be granted access to the target folder explicitly
|
||||
|
||||
## Common errors
|
||||
|
||||
| Error or symptom | Likely cause | Recovery |
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@
|
|||
"integrations",
|
||||
"configuration",
|
||||
"cli-reference",
|
||||
"ai-resources",
|
||||
"community"
|
||||
]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -54,9 +54,7 @@ ktx provides semantic-layer files, warehouse scans, wiki pages, provenance, and
|
|||
|
||||
- Installable setup skill: run \`npx skills add Kaelio/ktx --skill ktx\` from
|
||||
the project you want to configure.
|
||||
${link("/docs/ai-resources/agent-quickstart", "Agent Quickstart", "Task-first route for coding assistants using ktx")}
|
||||
${link("/docs/ai-resources/markdown-access", "Markdown Access", "Fetch ktx docs as llms.txt, llms-full.txt, or per-page Markdown")}
|
||||
${link("/docs/ai-resources/agent-instructions", "Agent Instructions", "Suggested instructions for coding assistants that need to read and cite ktx docs")}
|
||||
${link("/docs/community/ai-resources", "AI Resources", "How coding agents read, cite, and act on the ktx docs")}
|
||||
|
||||
## Start Here
|
||||
|
||||
|
|
@ -67,7 +65,7 @@ ${link("/docs/guides/writing-context", "Writing Context", "Write semantic source
|
|||
## Machine-Readable Documentation
|
||||
|
||||
- [Full documentation](${absoluteUrl("/llms-full.txt")}): All docs pages in one plain-text markdown response
|
||||
- [Markdown access guide](${absoluteUrl("/docs/ai-resources/markdown-access.md")}): How to fetch llms.txt, llms-full.txt, and per-page Markdown
|
||||
- [AI Resources guide](${absoluteUrl("/docs/community/ai-resources.md")}): How agents fetch llms.txt, llms-full.txt, and per-page Markdown
|
||||
- [Quickstart markdown](${absoluteUrl("/docs/getting-started/quickstart.md")}): Human setup walkthrough
|
||||
- [Semantic-layer CLI markdown](${absoluteUrl("/docs/cli-reference/ktx-sl.md")}): Semantic-layer commands and JSON output
|
||||
- [Wiki CLI markdown](${absoluteUrl("/docs/cli-reference/ktx-wiki.md")}): Wiki page commands and JSON output
|
||||
|
|
@ -147,8 +145,8 @@ function absoluteUrl(path: string) {
|
|||
|
||||
function formatCategoryName(category: string) {
|
||||
const labels: Record<string, string> = {
|
||||
"ai-resources": "AI Resources",
|
||||
"cli-reference": "CLI Reference",
|
||||
community: "Community & Resources",
|
||||
};
|
||||
|
||||
if (labels[category]) {
|
||||
|
|
|
|||
|
|
@ -30,7 +30,36 @@ const config = {
|
|||
};
|
||||
},
|
||||
async redirects() {
|
||||
// Alias-host canonicalization MUST come before the generic root/docs
|
||||
// redirects below. Those generic rules have no host guard, so if they ran
|
||||
// first they would inject a "/ktx" basePath into the path on the alias
|
||||
// hosts, which the alias catch-alls would then prepend a second time —
|
||||
// producing https://docs.kaelio.com/ktx/ktx/docs/... Redirects also run
|
||||
// before beforeFiles rewrites, so the ktx.sh catch-all must exclude
|
||||
// /stars* to let the stars dashboard rewrite proxy through.
|
||||
return [
|
||||
{
|
||||
source: "/slack",
|
||||
has: [{ type: "host", value: "ktx.sh" }],
|
||||
destination:
|
||||
"https://join.slack.com/t/ktxcommunity/shared_invite/zt-3y9b44m1x-LVyNNJD5nwaZHq4XS29LMQ",
|
||||
permanent: false,
|
||||
basePath: false,
|
||||
},
|
||||
{
|
||||
source: "/:path*",
|
||||
has: [{ type: "host", value: "docs.ktx.sh" }],
|
||||
destination: "https://docs.kaelio.com/ktx/:path*",
|
||||
permanent: true,
|
||||
basePath: false,
|
||||
},
|
||||
{
|
||||
source: "/:path((?!stars(?:/|$)).*)",
|
||||
has: [{ type: "host", value: "ktx.sh" }],
|
||||
destination: "https://docs.kaelio.com/ktx/:path",
|
||||
permanent: true,
|
||||
basePath: false,
|
||||
},
|
||||
{
|
||||
source: "/",
|
||||
destination: "/ktx/docs/getting-started/introduction",
|
||||
|
|
@ -44,26 +73,30 @@ const config = {
|
|||
basePath: false,
|
||||
},
|
||||
{
|
||||
source: "/:path*",
|
||||
has: [{ type: "host", value: "docs.ktx.sh" }],
|
||||
destination: "https://docs.kaelio.com/ktx/:path*",
|
||||
// AI Resources collapsed from four pages to one and now lives under the
|
||||
// Community & Resources section. Redirect the old top-level URL and the
|
||||
// retired per-page slugs to the new home. Redirects run before the .md
|
||||
// rewrite, so the Markdown variants must be matched first and keep their
|
||||
// .md suffix; otherwise a cached Markdown URL would 308 to the HTML page
|
||||
// and break the agent Markdown contract.
|
||||
source: "/docs/ai-resources.md",
|
||||
destination: "/docs/community/ai-resources.md",
|
||||
permanent: true,
|
||||
basePath: false,
|
||||
},
|
||||
{
|
||||
source: "/slack",
|
||||
has: [{ type: "host", value: "ktx.sh" }],
|
||||
destination:
|
||||
"https://join.slack.com/t/ktxcommunity/shared_invite/zt-3y9b44m1x-LVyNNJD5nwaZHq4XS29LMQ",
|
||||
permanent: false,
|
||||
basePath: false,
|
||||
source: "/docs/ai-resources/:slug([^/]+\\.md)",
|
||||
destination: "/docs/community/ai-resources.md",
|
||||
permanent: true,
|
||||
},
|
||||
{
|
||||
source: "/:path((?!stars(?:/|$)).*)",
|
||||
has: [{ type: "host", value: "ktx.sh" }],
|
||||
destination: "https://docs.kaelio.com/ktx/:path",
|
||||
source: "/docs/ai-resources",
|
||||
destination: "/docs/community/ai-resources",
|
||||
permanent: true,
|
||||
},
|
||||
{
|
||||
source: "/docs/ai-resources/:slug",
|
||||
destination: "/docs/community/ai-resources",
|
||||
permanent: true,
|
||||
basePath: false,
|
||||
},
|
||||
];
|
||||
},
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@ import assert from "node:assert/strict";
|
|||
import { spawn } from "node:child_process";
|
||||
import { once } from "node:events";
|
||||
import { readFile, writeFile } from "node:fs/promises";
|
||||
import http from "node:http";
|
||||
import https from "node:https";
|
||||
import { dirname, join } from "node:path";
|
||||
import { createServer } from "node:net";
|
||||
import { after, before, test } from "node:test";
|
||||
|
|
@ -100,6 +102,37 @@ after(async () => {
|
|||
}
|
||||
});
|
||||
|
||||
// Node's fetch (undici) overwrites the Host header with the connection host,
|
||||
// so the alias-host redirect rules never match. The low-level http(s) client
|
||||
// sends Host verbatim, which is what the alias canonicalization keys off of.
|
||||
function requestWithHost(hostHeader, path) {
|
||||
const target = new URL(docsSiteUrl);
|
||||
const client = target.protocol === "https:" ? https : http;
|
||||
const port =
|
||||
target.port || (target.protocol === "https:" ? "443" : "80");
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const request = client.request(
|
||||
{
|
||||
hostname: target.hostname,
|
||||
port,
|
||||
path,
|
||||
method: "GET",
|
||||
headers: { Host: hostHeader },
|
||||
},
|
||||
(response) => {
|
||||
response.resume();
|
||||
resolve({
|
||||
status: response.statusCode,
|
||||
location: response.headers.location,
|
||||
});
|
||||
},
|
||||
);
|
||||
request.on("error", reject);
|
||||
request.end();
|
||||
});
|
||||
}
|
||||
|
||||
test("/ktx/docs redirects to the docs introduction", async () => {
|
||||
const response = await fetch(`${docsSiteUrl}${docsBasePath}/docs`, {
|
||||
redirect: "manual",
|
||||
|
|
@ -112,6 +145,53 @@ test("/ktx/docs redirects to the docs introduction", async () => {
|
|||
);
|
||||
});
|
||||
|
||||
test("retired AI Resources URLs redirect to the page under Community", async () => {
|
||||
// The former top-level URL.
|
||||
const bare = await fetch(
|
||||
`${docsSiteUrl}${docsBasePath}/docs/ai-resources`,
|
||||
{ redirect: "manual" },
|
||||
);
|
||||
|
||||
assert.equal(bare.status, 308);
|
||||
assert.equal(
|
||||
bare.headers.get("location"),
|
||||
`${docsBasePath}/docs/community/ai-resources`,
|
||||
);
|
||||
|
||||
// A retired per-page slug.
|
||||
const slug = await fetch(
|
||||
`${docsSiteUrl}${docsBasePath}/docs/ai-resources/agent-quickstart`,
|
||||
{ redirect: "manual" },
|
||||
);
|
||||
|
||||
assert.equal(slug.status, 308);
|
||||
assert.equal(
|
||||
slug.headers.get("location"),
|
||||
`${docsBasePath}/docs/community/ai-resources`,
|
||||
);
|
||||
|
||||
// A retired per-page Markdown URL must stay Markdown: it has to redirect to
|
||||
// the new .md route, not fall through to the HTML page.
|
||||
const markdown = await fetch(
|
||||
`${docsSiteUrl}${docsBasePath}/docs/ai-resources/agent-quickstart.md`,
|
||||
{ redirect: "manual" },
|
||||
);
|
||||
|
||||
assert.equal(markdown.status, 308);
|
||||
assert.equal(
|
||||
markdown.headers.get("location"),
|
||||
`${docsBasePath}/docs/community/ai-resources.md`,
|
||||
);
|
||||
|
||||
// Following that redirect end to end must land on Markdown, not HTML.
|
||||
const followed = await fetch(
|
||||
`${docsSiteUrl}${docsBasePath}/docs/ai-resources/agent-quickstart.md`,
|
||||
);
|
||||
|
||||
assert.equal(followed.status, 200);
|
||||
assert.match(followed.headers.get("content-type") ?? "", /text\/markdown/);
|
||||
});
|
||||
|
||||
test("/ redirects into the /ktx docs site", async () => {
|
||||
const response = await fetch(`${docsSiteUrl}/`, {
|
||||
redirect: "manual",
|
||||
|
|
@ -141,3 +221,51 @@ test("/ktx/api/search returns docs search results", async () => {
|
|||
"search should return at least one docs result",
|
||||
);
|
||||
});
|
||||
|
||||
test("ktx.sh canonicalizes to a single /ktx basePath on the docs host", async () => {
|
||||
const root = await requestWithHost("ktx.sh", "/");
|
||||
assert.equal(root.status, 308);
|
||||
assert.equal(root.location, "https://docs.kaelio.com/ktx/");
|
||||
assert.ok(
|
||||
!root.location.includes("/ktx/ktx"),
|
||||
"the basePath must not be doubled",
|
||||
);
|
||||
|
||||
const page = await requestWithHost(
|
||||
"ktx.sh",
|
||||
"/docs/getting-started/quickstart",
|
||||
);
|
||||
assert.equal(page.status, 308);
|
||||
assert.equal(
|
||||
page.location,
|
||||
"https://docs.kaelio.com/ktx/docs/getting-started/quickstart",
|
||||
);
|
||||
});
|
||||
|
||||
test("docs.ktx.sh canonicalizes to a single /ktx basePath on the docs host", async () => {
|
||||
const root = await requestWithHost("docs.ktx.sh", "/");
|
||||
assert.equal(root.status, 308);
|
||||
assert.equal(root.location, "https://docs.kaelio.com/ktx");
|
||||
assert.ok(
|
||||
!root.location.includes("/ktx/ktx"),
|
||||
"the basePath must not be doubled",
|
||||
);
|
||||
|
||||
const page = await requestWithHost("docs.ktx.sh", "/llms.txt");
|
||||
assert.equal(page.status, 308);
|
||||
assert.equal(page.location, "https://docs.kaelio.com/ktx/llms.txt");
|
||||
});
|
||||
|
||||
test("ktx.sh keeps the /slack and /stars exceptions", async () => {
|
||||
const slack = await requestWithHost("ktx.sh", "/slack");
|
||||
assert.equal(slack.status, 307);
|
||||
assert.match(slack.location, /^https:\/\/join\.slack\.com\//);
|
||||
|
||||
// /stars is proxied by a beforeFiles rewrite, so the apex catch-all must not
|
||||
// canonicalize it to the docs host.
|
||||
const stars = await requestWithHost("ktx.sh", "/stars");
|
||||
assert.ok(
|
||||
!(stars.location ?? "").startsWith("https://docs.kaelio.com"),
|
||||
"the stars dashboard must not be redirected to the docs host",
|
||||
);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ test("product mechanics component explains ingestion outputs", async () => {
|
|||
"compile into SQL",
|
||||
'"use client"',
|
||||
"@xyflow/react",
|
||||
"<ReactFlow",
|
||||
"<FlowCanvas",
|
||||
"getSmoothStepPath",
|
||||
"animateMotion",
|
||||
"mechanics-particle",
|
||||
|
|
@ -97,21 +97,21 @@ test("product mechanics component explains ingestion outputs", async () => {
|
|||
);
|
||||
}
|
||||
|
||||
assert.match(
|
||||
component,
|
||||
// The ReactFlow canvas config lives in the shared FlowCanvas wrapper, which
|
||||
// product-mechanics renders. Assert the static read-only behavior there.
|
||||
const flowCanvas = await readDocsFile("components/flow-canvas.tsx");
|
||||
for (const guard of [
|
||||
/nodesDraggable=\{false\}/,
|
||||
"ReactFlow canvas should disable node dragging",
|
||||
);
|
||||
assert.match(
|
||||
component,
|
||||
/panOnDrag=\{false\}/,
|
||||
"ReactFlow canvas should disable panning",
|
||||
);
|
||||
assert.match(
|
||||
component,
|
||||
/nodesConnectable=\{false\}/,
|
||||
/zoomOnScroll=\{false\}/,
|
||||
"ReactFlow canvas should disable scroll zoom",
|
||||
);
|
||||
/elementsSelectable=\{false\}/,
|
||||
]) {
|
||||
assert.match(
|
||||
flowCanvas,
|
||||
guard,
|
||||
`shared FlowCanvas should enforce static read-only behavior: ${guard}`,
|
||||
);
|
||||
}
|
||||
|
||||
assert.doesNotMatch(component, /raw-sources/);
|
||||
assert.doesNotMatch(component, /\.ktx/);
|
||||
|
|
|
|||
74
docs-site/tests/product-runtime-content.test.mjs
Normal file
74
docs-site/tests/product-runtime-content.test.mjs
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { readFile } from "node:fs/promises";
|
||||
import { dirname, join } from "node:path";
|
||||
import { test } from "node:test";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
const docsSiteDir = join(dirname(fileURLToPath(import.meta.url)), "..");
|
||||
|
||||
async function readDocsFile(path) {
|
||||
return readFile(join(docsSiteDir, path), "utf8");
|
||||
}
|
||||
|
||||
test("docs introduction renders the serving phase after ingestion", async () => {
|
||||
const introduction = await readDocsFile(
|
||||
"content/docs/getting-started/introduction.mdx",
|
||||
);
|
||||
|
||||
assert.match(
|
||||
introduction,
|
||||
/import\s+\{\s*ProductRuntime\s*\}\s+from\s+"@\/components\/product-runtime";/,
|
||||
);
|
||||
assert.match(introduction, /<ProductRuntime\s*\/>/);
|
||||
|
||||
const mechanicsIndex = introduction.indexOf("<ProductMechanics />");
|
||||
const runtimeIndex = introduction.indexOf("<ProductRuntime />");
|
||||
const useCaseIndex = introduction.indexOf("## Use it for");
|
||||
|
||||
assert.ok(
|
||||
runtimeIndex > mechanicsIndex,
|
||||
"serving diagram should appear after the ingestion diagram",
|
||||
);
|
||||
assert.ok(
|
||||
runtimeIndex < useCaseIndex,
|
||||
"serving diagram should appear before use-case sections",
|
||||
);
|
||||
});
|
||||
|
||||
test("product runtime component explains the serving cycle", async () => {
|
||||
const component = await readDocsFile("components/product-runtime.tsx");
|
||||
|
||||
for (const expectedText of [
|
||||
"How serving works",
|
||||
"Serving flow",
|
||||
"From an agent request to a governed answer",
|
||||
"Your agent",
|
||||
"Claude Code",
|
||||
"Cursor",
|
||||
"Codex",
|
||||
"Search wiki + semantic layer",
|
||||
"Return approved metrics",
|
||||
"Compile metrics → SQL",
|
||||
"Context layer",
|
||||
"Database",
|
||||
"search + read",
|
||||
"read-only",
|
||||
"wiki/*.md",
|
||||
"semantic-layer/*.yaml",
|
||||
'"use client"',
|
||||
"@xyflow/react",
|
||||
"FlowCanvas",
|
||||
"getSmoothStepPath",
|
||||
"animateMotion",
|
||||
"runtime-particle",
|
||||
"buildCyclePath",
|
||||
]) {
|
||||
assert.ok(
|
||||
component.includes(expectedText),
|
||||
`component should include: ${expectedText}`,
|
||||
);
|
||||
}
|
||||
|
||||
assert.doesNotMatch(component, /raw-sources/);
|
||||
assert.doesNotMatch(component, /<img/);
|
||||
});
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
# KTX release runbook
|
||||
# ktx release runbook
|
||||
|
||||
This runbook covers the maintainer workflow for publishing `@kaelio/ktx` to
|
||||
npm through GitHub Actions. The workflow uses semantic-release to choose the
|
||||
|
|
@ -36,7 +36,7 @@ Before you publish, confirm these requirements:
|
|||
publish the first stable version as `0.1.0`.
|
||||
|
||||
semantic-release doesn't support choosing an arbitrary first `0.x` stable
|
||||
release. If KTX has no stable tag yet and you need the first stable release to
|
||||
release. If **ktx** has no stable tag yet and you need the first stable release to
|
||||
be `0.1.0`, create and push the baseline tag once before running the live
|
||||
stable workflow:
|
||||
|
||||
|
|
@ -46,7 +46,7 @@ git tag v0.0.0 "${root_commit}"
|
|||
git push origin v0.0.0
|
||||
```
|
||||
|
||||
KTX follows the same versioning schema as the main Kaelio release workflow:
|
||||
**ktx** follows the same versioning schema as the main Kaelio release workflow:
|
||||
breaking-change and `major` commit markers create a minor release, not an
|
||||
automatic major release. A major version requires an intentional manual release
|
||||
path.
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ The copied project initializes its own Git repository on first use.
|
|||
|
||||
## orbit-relationship-verification
|
||||
|
||||
`orbit-relationship-verification/` is a checked-in KTX project used by
|
||||
`orbit-relationship-verification/` is a checked-in **ktx** project used by
|
||||
`pnpm run relationships:verify-orbit`. It points the `orbit` SQLite connection
|
||||
at the Orbit-style no-declared-constraint relationship fixture and verifies that
|
||||
relationship enrichment writes nine accepted joins without requiring a local
|
||||
|
|
@ -27,7 +27,7 @@ warehouse credential.
|
|||
|
||||
`postgres-historic/` is a manual Docker-backed smoke for Postgres
|
||||
query-history ingest via `pg_stat_statements`. It verifies setup, staged
|
||||
query-history artifacts, KTX daemon batch SQL analysis, bounded pattern
|
||||
query-history artifacts, **ktx** daemon batch SQL analysis, bounded pattern
|
||||
WorkUnit shards, and no-WorkUnit idempotency for unchanged bucketed table
|
||||
inputs and pattern shards.
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# local-warehouse fixture
|
||||
|
||||
This directory is a contributor fixture for KTX CLI smoke tests. It uses the
|
||||
This directory is a contributor fixture for **ktx** CLI smoke tests. It uses the
|
||||
internal fake ingest adapter so tests can run without a live database or
|
||||
external service.
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ storage:
|
|||
state: sqlite
|
||||
search: sqlite-fts5
|
||||
git:
|
||||
auto_commit: true
|
||||
author: "ktx <ktx@example.com>"
|
||||
ingest:
|
||||
adapters:
|
||||
|
|
@ -18,5 +17,3 @@ agent:
|
|||
- sl_query
|
||||
- wiki_search
|
||||
- sl_read_source
|
||||
memory:
|
||||
auto_commit: true
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
# Orbit-style relationship discovery verification
|
||||
|
||||
This KTX project backs the default `relationships:verify-orbit` command. It uses
|
||||
This **ktx** project backs the default `relationships:verify-orbit` command. It uses
|
||||
the checked-in Orbit-style SQLite fixture from the relationship discovery
|
||||
benchmark corpus, with no declared primary keys or foreign keys in the database
|
||||
schema.
|
||||
|
||||
Run from the KTX workspace root:
|
||||
Run from the **ktx** workspace root:
|
||||
|
||||
```bash
|
||||
pnpm run relationships:verify-orbit
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ storage:
|
|||
state: sqlite
|
||||
search: sqlite-fts5
|
||||
git:
|
||||
auto_commit: true
|
||||
author: "ktx <ktx@example.com>"
|
||||
ingest:
|
||||
adapters: []
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ generated local project.
|
|||
The managed Python runtime smoke requires `uv` on `PATH`, isolates
|
||||
`KTX_RUNTIME_ROOT`, verifies `ktx admin runtime status`, runs `ktx sl query --yes` to
|
||||
install the core runtime from the bundled wheel, checks `ktx admin runtime status`,
|
||||
starts and reuses the KTX daemon, and stops it.
|
||||
starts and reuses the **ktx** daemon, and stops it.
|
||||
|
||||
The artifact manifest contains the public `@kaelio/ktx` npm tarball and the
|
||||
bundled `kaelio-ktx` runtime wheel. The smoke does not install standalone
|
||||
|
|
|
|||
|
|
@ -17,19 +17,19 @@ unchanged bounded pattern shards do not schedule LLM work.
|
|||
## Prerequisites
|
||||
|
||||
- Docker with Compose v2
|
||||
- Node and pnpm matching the KTX workspace
|
||||
- `uv` on `PATH` so the KTX-managed Python runtime can install the bundled
|
||||
- Node and pnpm matching the **ktx** workspace
|
||||
- `uv` on `PATH` so the **ktx**-managed Python runtime can install the bundled
|
||||
runtime wheel
|
||||
|
||||
## Run
|
||||
|
||||
From the KTX repository root:
|
||||
From the **ktx** repository root:
|
||||
|
||||
```bash
|
||||
examples/postgres-historic/scripts/smoke.sh
|
||||
```
|
||||
|
||||
The smoke creates a temporary KTX project, isolates the managed Python runtime
|
||||
The smoke creates a temporary **ktx** project, isolates the managed Python runtime
|
||||
under the temporary project parent, starts Postgres on `127.0.0.1:55432`, and
|
||||
uses this connection URL:
|
||||
|
||||
|
|
@ -41,7 +41,7 @@ Set `KTX_POSTGRES_HISTORIC_KEEP_DOCKER=1` to leave the container running after
|
|||
the script exits.
|
||||
|
||||
The smoke validates the query-history raw snapshot path without requiring LLM
|
||||
credentials. It uses KTX's local stage-only ingest API after `ktx setup`, so the
|
||||
credentials. It uses **ktx**'s local stage-only ingest API after `ktx setup`, so the
|
||||
deterministic reader, batch SQL parser, stable artifact writer, and diff-based
|
||||
WorkUnit planning are checked independently from curation.
|
||||
|
||||
|
|
@ -124,6 +124,6 @@ table.
|
|||
- Missing grants: confirm `GRANT pg_read_all_stats TO ktx_reader;`.
|
||||
- Empty snapshot: rerun `scripts/generate-workload.sh base` and keep
|
||||
`--query-history-min-executions 2` for the smoke.
|
||||
- SQL-analysis failures: run `pnpm run ktx -- dev runtime status` from the KTX
|
||||
- SQL-analysis failures: run `pnpm run ktx -- dev runtime status` from the **ktx**
|
||||
repository root and confirm `uv`, the bundled Python wheel, and the managed
|
||||
runtime all pass.
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "ktx-workspace",
|
||||
"version": "0.9.0",
|
||||
"version": "0.13.1",
|
||||
"description": "Workspace root for ktx packages",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
|
|
@ -69,11 +69,6 @@
|
|||
"typescript": "^6.0.3",
|
||||
"yaml": "^2.9.0"
|
||||
},
|
||||
"pnpm": {
|
||||
"onlyBuiltDependencies": [
|
||||
"better-sqlite3"
|
||||
]
|
||||
},
|
||||
"license": "Apache-2.0",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
|
|
|
|||
|
|
@ -1,7 +1,11 @@
|
|||
{
|
||||
"name": "@kaelio/ktx",
|
||||
"version": "0.9.0",
|
||||
"version": "0.13.1",
|
||||
"description": "Standalone ktx context layer for data agents",
|
||||
"author": {
|
||||
"name": "Kaelio",
|
||||
"url": "https://www.kaelio.com"
|
||||
},
|
||||
"type": "module",
|
||||
"engines": {
|
||||
"node": ">=22.0.0"
|
||||
|
|
@ -47,10 +51,13 @@
|
|||
"@ai-sdk/devtools": "0.0.18",
|
||||
"@ai-sdk/google-vertex": "^4.0.134",
|
||||
"@anthropic-ai/claude-agent-sdk": "0.3.146",
|
||||
"@clack/core": "1.3.1",
|
||||
"@clack/prompts": "1.4.0",
|
||||
"@clickhouse/client": "^1.18.5",
|
||||
"@commander-js/extra-typings": "14.0.0",
|
||||
"@duckdb/node-api": "1.5.3-r.3",
|
||||
"@google-cloud/bigquery": "^8.3.1",
|
||||
"google-auth-library": "10.6.2",
|
||||
"@looker/sdk": "^26.8.0",
|
||||
"@looker/sdk-node": "^26.8.0",
|
||||
"@looker/sdk-rtl": "^21.6.5",
|
||||
|
|
@ -72,6 +79,7 @@
|
|||
"pg": "^8.21.0",
|
||||
"posthog-node": "^5.34.9",
|
||||
"react": "^19.2.6",
|
||||
"semver": "^7.8.1",
|
||||
"simple-git": "3.36.0",
|
||||
"snowflake-sdk": "^2.4.2",
|
||||
"yaml": "^2.9.0",
|
||||
|
|
@ -85,6 +93,7 @@
|
|||
"@types/node": "^25.9.1",
|
||||
"@types/pg": "^8.20.0",
|
||||
"@types/react": "^19.2.15",
|
||||
"@types/semver": "^7.7.1",
|
||||
"@vitest/coverage-v8": "^4.1.7",
|
||||
"ajv": "8.20.0",
|
||||
"ink-testing-library": "^4.0.0",
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ export function registerAdminCommands(program: Command, context: KtxCliCommandCo
|
|||
|
||||
admin
|
||||
.command('init')
|
||||
.description('Initialize a Git-backed KTX project directory for maintenance scripts')
|
||||
.description('Initialize a Git-backed ktx project directory for maintenance scripts')
|
||||
.argument('[directory]', 'Project directory')
|
||||
.option('--force', 'Rewrite ktx.yaml and scaffold files in an existing project', false)
|
||||
.action(
|
||||
|
|
|
|||
|
|
@ -3,6 +3,30 @@ import type { KtxCliIo } from './cli-runtime.js';
|
|||
|
||||
const ESC = String.fromCharCode(0x1b);
|
||||
|
||||
export interface CliStyleEnv {
|
||||
NO_COLOR?: string;
|
||||
TERM?: string;
|
||||
}
|
||||
|
||||
function ansiEnabled(env: CliStyleEnv = process.env): boolean {
|
||||
return !env.NO_COLOR && env.TERM !== 'dumb';
|
||||
}
|
||||
|
||||
function ansiColor(text: string, open: number, close: number, env?: CliStyleEnv): string {
|
||||
if (!ansiEnabled(env)) {
|
||||
return text;
|
||||
}
|
||||
return `${ESC}[${open}m${text}${ESC}[${close}m`;
|
||||
}
|
||||
|
||||
export function dim(text: string, env?: CliStyleEnv): string {
|
||||
return ansiColor(text, 2, 22, env);
|
||||
}
|
||||
|
||||
export function cyan(text: string, env?: CliStyleEnv): string {
|
||||
return ansiColor(text, 36, 39, env);
|
||||
}
|
||||
|
||||
export interface RailBufferedSource {
|
||||
stdoutText(): string;
|
||||
stderrText(): string;
|
||||
|
|
@ -57,27 +81,39 @@ class KtxCliPromptCancelledError extends Error {
|
|||
}
|
||||
|
||||
export function createClackSpinner(): KtxCliSpinner {
|
||||
return spinner();
|
||||
// clack colors the animated spinner frame magenta by default; styleFrame
|
||||
// (typed in SpinnerOptions, absent from the README) recolors it ktx orange.
|
||||
return spinner({ styleFrame: orange });
|
||||
}
|
||||
|
||||
function magenta(text: string): string {
|
||||
return `${ESC}[35m${text}${ESC}[39m`;
|
||||
// ktx mascot orange (#FF8A4C) via 24-bit truecolor.
|
||||
function orange(text: string): string {
|
||||
if (!ansiEnabled()) {
|
||||
return text;
|
||||
}
|
||||
return `${ESC}[38;2;255;138;76m${text}${ESC}[39m`;
|
||||
}
|
||||
|
||||
function red(text: string): string {
|
||||
return `${ESC}[31m${text}${ESC}[39m`;
|
||||
return ansiColor(text, 31, 39);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stderr-only, non-animated spinner. Use this instead of {@link createCliSpinner}
|
||||
* when the next step reads stdin in raw mode (an Ink TUI or a keypress wait):
|
||||
* the animated clack spinner seizes stdin via `@clack/core`'s `block()` and
|
||||
* leaves it dirty, which the following raw-mode reader misreads as a stray key.
|
||||
*/
|
||||
export function createStaticCliSpinner(io: KtxCliSpinnerIo): KtxCliSpinner {
|
||||
return {
|
||||
start(message) {
|
||||
io.stderr.write(`${magenta('◐')} ${message}\n`);
|
||||
io.stderr.write(`${orange('◐')} ${message}\n`);
|
||||
},
|
||||
message(message) {
|
||||
io.stderr.write(`${magenta('│')} ${message}\n`);
|
||||
io.stderr.write(`${orange('│')} ${message}\n`);
|
||||
},
|
||||
stop(message) {
|
||||
io.stderr.write(`${magenta('◇')} ${message}\n`);
|
||||
io.stderr.write(`${orange('◇')} ${message}\n`);
|
||||
},
|
||||
error(message) {
|
||||
io.stderr.write(`${red('■')} ${message}\n`);
|
||||
|
|
@ -85,6 +121,30 @@ export function createStaticCliSpinner(io: KtxCliSpinnerIo): KtxCliSpinner {
|
|||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Animated spinner in an interactive terminal, static `◐/◇/■` lines otherwise
|
||||
* (scripts, CI, piped output) so logs stay clean and uncluttered by frames.
|
||||
*/
|
||||
export function createCliSpinner(io: KtxCliIo): KtxCliSpinner {
|
||||
return io.stdout.isTTY === true ? createClackSpinner() : createStaticCliSpinner(io);
|
||||
}
|
||||
|
||||
export async function runWithCliSpinner<T>(
|
||||
spinner: KtxCliSpinner,
|
||||
text: { start: string; success: string; failure: string },
|
||||
run: () => Promise<T>,
|
||||
): Promise<T> {
|
||||
spinner.start(text.start);
|
||||
try {
|
||||
const value = await run();
|
||||
spinner.stop(text.success);
|
||||
return value;
|
||||
} catch (error) {
|
||||
spinner.error(text.failure);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
export function createClackPromptAdapter(): KtxCliPromptAdapter {
|
||||
return {
|
||||
async confirm(options) {
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ export function formatClaudeCodePromptCachingWarning(fields: string[]): string |
|
|||
if (fields.length === 0) {
|
||||
return null;
|
||||
}
|
||||
return `claude-code ignores ${fields.join(', ')} because the Claude Agent SDK does not expose KTX prompt-cache TTL, tool, or history markers.`;
|
||||
return `claude-code ignores ${fields.join(', ')} because the Claude Agent SDK does not expose ktx prompt-cache TTL, tool, or history markers.`;
|
||||
}
|
||||
|
||||
export function formatClaudeCodePromptCachingFix(): string {
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import { existsSync } from 'node:fs';
|
|||
import { join } from 'node:path';
|
||||
import { Command, type CommandUnknownOpts, InvalidArgumentError } from '@commander-js/extra-typings';
|
||||
import type { KtxCliDeps, KtxCliIo, KtxCliPackageInfo } from './cli-runtime.js';
|
||||
import { SLACK_HELP_FOOTER, writeErrorCommunityHint } from './community-cta.js';
|
||||
import { registerCompletionCommands } from './commands/completion-commands.js';
|
||||
import { registerConnectionCommands } from './commands/connection-commands.js';
|
||||
import { registerIngestCommands } from './commands/ingest-commands.js';
|
||||
|
|
@ -16,6 +17,7 @@ import { renderMissingProjectMessage } from './doctor.js';
|
|||
import { findNearestKtxProjectDir, resolveKtxProjectDir } from './project-resolver.js';
|
||||
import { profileMark, profileSpan } from './startup-profile.js';
|
||||
import type { CommandOutcome } from './telemetry/index.js';
|
||||
import { prepareUpdateCheckNotice, type PrepareUpdateCheckNoticeOptions } from './update-check/update-check.js';
|
||||
|
||||
profileMark('module:cli-program');
|
||||
|
||||
|
|
@ -39,6 +41,8 @@ interface KtxCommanderProgramOptions {
|
|||
runInit: (args: { projectDir: string; force: boolean }, io: KtxCliIo) => Promise<number>;
|
||||
}
|
||||
|
||||
type KtxCliUpdateCheckOptions = Pick<PrepareUpdateCheckNoticeOptions, 'env' | 'fetchDistTags' | 'homeDir' | 'now'>;
|
||||
|
||||
export interface BuildKtxProgramOptions {
|
||||
io: KtxCliIo;
|
||||
deps: KtxCliDeps;
|
||||
|
|
@ -47,6 +51,7 @@ export interface BuildKtxProgramOptions {
|
|||
setExitCode?: (code: number) => void;
|
||||
argv?: string[];
|
||||
setTelemetryModule?: (telemetry: typeof import('./telemetry/index.js')) => void;
|
||||
updateCheck?: KtxCliUpdateCheckOptions;
|
||||
}
|
||||
|
||||
type CommanderExitLike = { exitCode: number; code: string; message: string };
|
||||
|
|
@ -247,13 +252,14 @@ export function resolveCommandProjectDirOverride(command: CommandWithGlobalOptio
|
|||
function createBaseProgram(info: KtxCliPackageInfo, io: KtxCliIo): Command {
|
||||
return new Command()
|
||||
.name('ktx')
|
||||
.description('KTX data agent context layer CLI')
|
||||
.option('--project-dir <path>', 'KTX project directory (default: KTX_PROJECT_DIR, nearest ktx.yaml, or cwd)')
|
||||
.description('ktx data agent context layer CLI')
|
||||
.option('--project-dir <path>', 'ktx project directory (default: KTX_PROJECT_DIR, nearest ktx.yaml, or cwd)')
|
||||
.option('--debug', 'Enable diagnostic logging to stderr')
|
||||
.version(`${info.name} ${info.version}`, '-v, --version', 'Show CLI version')
|
||||
.helpOption('-h, --help', 'Show this help text')
|
||||
.configureHelp({ showGlobalOptions: true })
|
||||
.showHelpAfterError()
|
||||
.addHelpText('after', `\n${SLACK_HELP_FOOTER}`)
|
||||
.exitOverride()
|
||||
.configureOutput({
|
||||
writeOut: (chunk) => io.stdout.write(chunk),
|
||||
|
|
@ -431,23 +437,36 @@ export function collectCommandFlagsPresent(command: CommandUnknownOpts): Record<
|
|||
|
||||
export function buildKtxProgram(options: BuildKtxProgramOptions): Command {
|
||||
const program = createBaseProgram(options.packageInfo, options.io);
|
||||
let pendingUpdateNotice: string | null = null;
|
||||
|
||||
program.hook('preAction', async (_thisCommand, actionCommand) => {
|
||||
// The hidden completion command must stay silent and side-effect free: skip
|
||||
// the telemetry notice, command span, and project checks entirely.
|
||||
// the telemetry notice, command span, project checks, and update checks entirely.
|
||||
if (commandPath(actionCommand as CommandPathNode).includes('__complete')) {
|
||||
return;
|
||||
}
|
||||
const commandNode = actionCommand as CommandPathNode;
|
||||
const updateCheck = await prepareUpdateCheckNotice({
|
||||
io: options.io,
|
||||
env: options.updateCheck?.env,
|
||||
fetchDistTags: options.updateCheck?.fetchDistTags,
|
||||
homeDir: options.updateCheck?.homeDir,
|
||||
installedVersion: options.packageInfo.version,
|
||||
now: options.updateCheck?.now,
|
||||
commandOptions: commandOptions(commandNode),
|
||||
});
|
||||
pendingUpdateNotice = updateCheck.notice;
|
||||
|
||||
const telemetry = await import('./telemetry/index.js');
|
||||
options.setTelemetryModule?.(telemetry);
|
||||
await telemetry.showTelemetryNoticeIfNeeded(options.io, options.packageInfo);
|
||||
const commandNode = actionCommand as CommandPathNode;
|
||||
const path = commandPath(commandNode);
|
||||
const projectDir = resolveCommandProjectDir(commandNode);
|
||||
const hasProject = ktxYamlExists(projectDir);
|
||||
const attachProjectGroup = shouldAttachCommandProjectGroup(path, hasProject);
|
||||
telemetry.beginCommandSpan({
|
||||
commandPath: path,
|
||||
flagsPresent: collectCommandFlagsPresent(commandNode as unknown as CommandUnknownOpts),
|
||||
flagsPresent: collectCommandFlagsPresent(actionCommand),
|
||||
projectDir: attachProjectGroup ? projectDir : undefined,
|
||||
hasProject,
|
||||
attachProjectGroup,
|
||||
|
|
@ -457,6 +476,13 @@ export function buildKtxProgram(options: BuildKtxProgramOptions): Command {
|
|||
ensureProjectAvailable(options.io, commandNode);
|
||||
});
|
||||
|
||||
program.hook('postAction', () => {
|
||||
if (pendingUpdateNotice) {
|
||||
options.io.stderr.write(pendingUpdateNotice);
|
||||
pendingUpdateNotice = null;
|
||||
}
|
||||
});
|
||||
|
||||
const context: KtxCliCommandContext = {
|
||||
io: options.io,
|
||||
deps: options.deps,
|
||||
|
|
@ -529,7 +555,15 @@ export async function runCommanderKtxCli(
|
|||
try {
|
||||
return await runBareInteractiveCommand(program, io, context);
|
||||
} catch (error) {
|
||||
const telemetry = await import('./telemetry/index.js');
|
||||
await telemetry.reportException({
|
||||
error,
|
||||
context: { source: 'bare-interactive', handled: true, fatal: false },
|
||||
packageInfo: info,
|
||||
io,
|
||||
});
|
||||
io.stderr.write(`${formatCliError(error)}\n`);
|
||||
writeErrorCommunityHint(io, 'error');
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
|
@ -554,6 +588,7 @@ export async function runCommanderKtxCli(
|
|||
exitCode = error.exitCode === 0 ? 0 : 1;
|
||||
} else {
|
||||
io.stderr.write(`${formatCliError(error)}\n`);
|
||||
writeErrorCommunityHint(io, 'error');
|
||||
exitCode = 1;
|
||||
}
|
||||
} finally {
|
||||
|
|
@ -563,6 +598,23 @@ export async function runCommanderKtxCli(
|
|||
outcome: commandOutcomeForParseResult(parseError, exitCode),
|
||||
error: parseError,
|
||||
});
|
||||
if (
|
||||
parseError &&
|
||||
!isCommanderExit(parseError) &&
|
||||
!isKtxProjectMissingAbortError(parseError)
|
||||
) {
|
||||
await telemetryModule.reportException({
|
||||
error: parseError,
|
||||
context: {
|
||||
source: completed?.commandPath.join(' ') ?? 'commander parseAsync',
|
||||
handled: true,
|
||||
fatal: false,
|
||||
},
|
||||
projectDir: completed?.projectGroupAttached ? completed.projectDir : undefined,
|
||||
packageInfo: info,
|
||||
io,
|
||||
});
|
||||
}
|
||||
await telemetryModule.emitCompletedCommand({ completed, packageInfo: info, io });
|
||||
await telemetryModule.shutdownTelemetryEmitter();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ import type { KtxSqlArgs } from './sql.js';
|
|||
import { profileMark, profileSpan } from './startup-profile.js';
|
||||
import type { KtxTextIngestArgs } from './text-ingest.js';
|
||||
import { assertCliVersion } from './release-version.js';
|
||||
import { writeErrorCommunityHint } from './community-cta.js';
|
||||
|
||||
profileMark('module:cli-runtime');
|
||||
|
||||
|
|
@ -60,7 +61,7 @@ export function packageInfoFromJson(packageJson: unknown): KtxCliPackageInfo {
|
|||
typeof packageJson.name !== 'string' ||
|
||||
typeof packageJson.version !== 'string'
|
||||
) {
|
||||
throw new Error('Invalid KTX CLI package metadata');
|
||||
throw new Error('Invalid ktx CLI package metadata');
|
||||
}
|
||||
|
||||
return {
|
||||
|
|
@ -76,7 +77,7 @@ async function runInit(args: { projectDir: string; force: boolean }, io: KtxCliI
|
|||
force: args.force,
|
||||
});
|
||||
|
||||
io.stdout.write(`Initialized KTX project at ${result.projectDir}\n`);
|
||||
io.stdout.write(`Initialized ktx project at ${result.projectDir}\n`);
|
||||
io.stdout.write(`Config: ${result.configPath}\n`);
|
||||
io.stdout.write(`Commit: ${result.commitHash ?? 'none'}\n`);
|
||||
return 0;
|
||||
|
|
@ -129,6 +130,54 @@ function installTelemetrySignalFlush(io: KtxCliIo, info: KtxCliPackageInfo): ()
|
|||
};
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function createGlobalExceptionReporter(io: KtxCliIo, info: KtxCliPackageInfo) {
|
||||
return async (source: 'uncaughtException' | 'unhandledRejection', error: unknown): Promise<void> => {
|
||||
const { reportException, shutdownTelemetryEmitter } = await import('./telemetry/index.js');
|
||||
await reportException({
|
||||
error,
|
||||
context: { source, handled: false, fatal: true },
|
||||
io,
|
||||
packageInfo: info,
|
||||
immediate: true,
|
||||
});
|
||||
await shutdownTelemetryEmitter();
|
||||
};
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function writeGlobalExceptionToStderr(io: KtxCliIo, error: unknown): void {
|
||||
if (error instanceof Error && error.stack) {
|
||||
io.stderr.write(`${error.stack}\n`);
|
||||
} else {
|
||||
io.stderr.write(`${String(error)}\n`);
|
||||
}
|
||||
writeErrorCommunityHint(io, 'crash');
|
||||
}
|
||||
|
||||
export function installGlobalExceptionHandlers(io: KtxCliIo, info: KtxCliPackageInfo): () => void {
|
||||
const report = createGlobalExceptionReporter(io, info);
|
||||
const handle = (source: 'uncaughtException' | 'unhandledRejection', error: unknown): void => {
|
||||
void (async () => {
|
||||
try {
|
||||
await report(source, error);
|
||||
} catch {
|
||||
// Best-effort: preserve Node's process termination behavior.
|
||||
}
|
||||
writeGlobalExceptionToStderr(io, error);
|
||||
process.exit(1);
|
||||
})();
|
||||
};
|
||||
const onUncaught = (error: Error): void => handle('uncaughtException', error);
|
||||
const onUnhandled = (reason: unknown): void => handle('unhandledRejection', reason);
|
||||
process.on('uncaughtException', onUncaught);
|
||||
process.on('unhandledRejection', onUnhandled);
|
||||
return () => {
|
||||
process.off('uncaughtException', onUncaught);
|
||||
process.off('unhandledRejection', onUnhandled);
|
||||
};
|
||||
}
|
||||
|
||||
export async function runKtxCli(
|
||||
argv = process.argv.slice(2),
|
||||
io: KtxCliIo = process,
|
||||
|
|
@ -141,11 +190,14 @@ export async function runKtxCli(
|
|||
// Real-process entry only: flush telemetry if interrupted. Test/programmatic
|
||||
// callers pass their own `io`, so they never install process-level handlers.
|
||||
const removeSignalFlush = (io as unknown) === process ? installTelemetrySignalFlush(io, info) : undefined;
|
||||
const removeGlobalExceptionHandlers =
|
||||
(io as unknown) === process ? installGlobalExceptionHandlers(io, info) : undefined;
|
||||
try {
|
||||
return await runCommanderKtxCli(argv, io, deps, info, {
|
||||
runInit: runInitForCommander,
|
||||
});
|
||||
} finally {
|
||||
removeGlobalExceptionHandlers?.();
|
||||
removeSignalFlush?.();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ export function registerConnectionCommands(program: Command, context: KtxCliComm
|
|||
connection
|
||||
.command('test')
|
||||
.description('Test one or all configured connections (default: all)')
|
||||
.argument('[connectionId]', 'KTX connection id to test (omit to test all)')
|
||||
.argument('[connectionId]', 'ktx connection id to test (omit to test all)')
|
||||
.option('--all', 'Test every configured connection and print a summary list')
|
||||
.action(async (connectionId: string | undefined, options: { all?: boolean }, command) => {
|
||||
if (options.all === true && connectionId !== undefined) {
|
||||
|
|
|
|||
|
|
@ -25,16 +25,16 @@ export function registerIngestCommands(
|
|||
): void {
|
||||
const ingest = program
|
||||
.command('ingest')
|
||||
.description('Build or inspect KTX context, or capture text into memory')
|
||||
.description('Build or inspect ktx context, or capture text into memory')
|
||||
.usage('[options] [connectionId]')
|
||||
.argument('[connectionId]', 'Configured connection id to ingest (omit to ingest all)')
|
||||
.option('--all', 'Ingest all configured connections', false)
|
||||
.addOption(new Option('--query-history', 'Include database query-history usage patterns').conflicts('noQueryHistory'))
|
||||
.addOption(new Option('--no-query-history', 'Skip database query-history usage patterns'))
|
||||
.option('--query-history-window-days <days>', 'Query-history lookback window for this run', parsePositiveIntegerOption)
|
||||
.option('--text <content>', 'Capture inline text into KTX memory; repeatable', collectOption, [])
|
||||
.option('--file <path>', 'Capture a text file into KTX memory; use - for stdin; repeatable', collectOption, [])
|
||||
.option('--connection-id <connectionId>', 'KTX connection id to tag captured text/file notes')
|
||||
.option('--text <content>', 'Capture inline text into ktx memory; repeatable', collectOption, [])
|
||||
.option('--file <path>', 'Capture a text file into ktx memory; use - for stdin; repeatable', collectOption, [])
|
||||
.option('--connection-id <connectionId>', 'ktx connection id to tag captured text/file notes')
|
||||
.option('--user-id <id>', 'Memory user id for text/file capture attribution', 'local-cli')
|
||||
.option('--fail-fast', 'Stop after the first failed text/file item', false)
|
||||
.addOption(new Option('--plain', 'Print plain text output').conflicts(['json']))
|
||||
|
|
|
|||
|
|
@ -27,11 +27,11 @@ function binPath(): string {
|
|||
|
||||
function formatMcpStartResultMessage(input: { status: 'started' | 'already-running'; url: string }): string {
|
||||
return [
|
||||
input.status === 'started' ? `KTX MCP daemon started: ${input.url}` : `KTX MCP daemon already running: ${input.url}`,
|
||||
input.status === 'started' ? `ktx MCP daemon started: ${input.url}` : `ktx MCP daemon already running: ${input.url}`,
|
||||
'',
|
||||
'KTX is ready for configured agents.',
|
||||
'Open your agent for this KTX project and ask a data question, for example:',
|
||||
' "Use KTX to show me the available tables and metrics."',
|
||||
'ktx is ready for configured agents.',
|
||||
'Open your agent for this ktx project and ask a data question, for example:',
|
||||
' "Use ktx to show me the available tables and metrics."',
|
||||
'',
|
||||
].join('\n');
|
||||
}
|
||||
|
|
@ -50,14 +50,14 @@ async function printMcpStatus(context: KtxCliCommandContext, projectDir: string)
|
|||
export function registerMcpCommands(program: Command, context: KtxCliCommandContext): void {
|
||||
const mcp = program
|
||||
.command('mcp')
|
||||
.description('Manage the KTX MCP HTTP server (bare command: show status)')
|
||||
.description('Manage the ktx MCP HTTP server (bare command: show status)')
|
||||
.action(async (_options, command) => {
|
||||
await printMcpStatus(context, resolveCommandProjectDir(command));
|
||||
});
|
||||
|
||||
mcp
|
||||
.command('stdio')
|
||||
.description('Run the KTX MCP server over stdio')
|
||||
.description('Run the ktx MCP server over stdio')
|
||||
.action(async (_options, command) => {
|
||||
await (context.deps.mcp?.runStdioServer ?? runKtxMcpStdioServer)({
|
||||
projectDir: resolveCommandProjectDir(command),
|
||||
|
|
@ -68,7 +68,7 @@ export function registerMcpCommands(program: Command, context: KtxCliCommandCont
|
|||
|
||||
mcp
|
||||
.command('start')
|
||||
.description('Start the KTX MCP HTTP server')
|
||||
.description('Start the ktx MCP HTTP server')
|
||||
.option('--host <host>', 'Host to bind', '127.0.0.1')
|
||||
.option('--port <n>', 'Port to bind', parsePositiveIntegerOption, 7878)
|
||||
.option('--token <token>', 'Bearer token required for non-loopback binding')
|
||||
|
|
@ -96,7 +96,7 @@ export function registerMcpCommands(program: Command, context: KtxCliCommandCont
|
|||
allowedOrigins: options.allowedOrigin,
|
||||
io: context.io,
|
||||
});
|
||||
context.io.stdout.write(`KTX MCP server listening at http://${options.host}:${options.port}/mcp\n`);
|
||||
context.io.stdout.write(`ktx MCP server listening at http://${options.host}:${options.port}/mcp\n`);
|
||||
return;
|
||||
}
|
||||
const result = await (context.deps.mcp?.startDaemon ?? startKtxMcpDaemon)({
|
||||
|
|
@ -114,24 +114,24 @@ export function registerMcpCommands(program: Command, context: KtxCliCommandCont
|
|||
|
||||
mcp
|
||||
.command('stop')
|
||||
.description('Stop the KTX MCP daemon')
|
||||
.description('Stop the ktx MCP daemon')
|
||||
.action(async (_options, command) => {
|
||||
const result = await (context.deps.mcp?.stopDaemon ?? stopKtxMcpDaemon)({
|
||||
projectDir: resolveCommandProjectDir(command),
|
||||
});
|
||||
context.io.stdout.write(result.status === 'stopped' ? 'KTX MCP daemon stopped.\n' : 'KTX MCP daemon is not running.\n');
|
||||
context.io.stdout.write(result.status === 'stopped' ? 'ktx MCP daemon stopped.\n' : 'ktx MCP daemon is not running.\n');
|
||||
});
|
||||
|
||||
mcp
|
||||
.command('status')
|
||||
.description('Show KTX MCP daemon status')
|
||||
.description('Show ktx MCP daemon status')
|
||||
.action(async (_options, command) => {
|
||||
await printMcpStatus(context, resolveCommandProjectDir(command));
|
||||
});
|
||||
|
||||
mcp
|
||||
.command('logs')
|
||||
.description('Print the KTX MCP daemon log')
|
||||
.description('Print the ktx MCP daemon log')
|
||||
.option('--follow', 'Follow log output', false)
|
||||
.action(async (options, command) => {
|
||||
const logPath = mcpDaemonLayout(resolveCommandProjectDir(command)).logPath;
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ async function runRuntimeArgs(context: KtxCliCommandContext, args: KtxRuntimeArg
|
|||
export function registerRuntimeCommands(program: Command, context: KtxCliCommandContext): void {
|
||||
const runtime = program
|
||||
.command('runtime')
|
||||
.description('Install, start, stop, and inspect the KTX-managed Python runtime')
|
||||
.description('Install, start, stop, and inspect the ktx-managed Python runtime')
|
||||
.showHelpAfterError();
|
||||
|
||||
runtime
|
||||
|
|
@ -38,7 +38,7 @@ export function registerRuntimeCommands(program: Command, context: KtxCliCommand
|
|||
|
||||
runtime
|
||||
.command('start')
|
||||
.description('Start the KTX daemon')
|
||||
.description('Start the ktx daemon')
|
||||
.addOption(createRuntimeFeatureOption())
|
||||
.option('--force', 'Restart even when a matching daemon is already running', false)
|
||||
.action(async (options: { feature: RuntimeFeature; force?: boolean }, command: CommandWithGlobalOptions) => {
|
||||
|
|
@ -53,8 +53,8 @@ export function registerRuntimeCommands(program: Command, context: KtxCliCommand
|
|||
|
||||
runtime
|
||||
.command('stop')
|
||||
.description('Stop the KTX daemon')
|
||||
.option('--all', 'Stop all KTX daemon processes recorded or discoverable on this machine', false)
|
||||
.description('Stop the ktx daemon')
|
||||
.option('--all', 'Stop all ktx daemon processes recorded or discoverable on this machine', false)
|
||||
.action(async (options: { all?: boolean }, command: CommandWithGlobalOptions) => {
|
||||
await runRuntimeArgs(context, {
|
||||
command: 'stop',
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ import { type Command, InvalidArgumentError, Option } from '@commander-js/extra-
|
|||
import type { KtxCliCommandContext } from '../cli-program.js';
|
||||
import { resolveCommandProjectDir } from '../cli-program.js';
|
||||
import type { KtxSetupDatabaseDriver } from '../setup-databases.js';
|
||||
import type { KtxSetupLlmBackend } from '../setup-models.js';
|
||||
import { isKtxSetupLlmBackend, type KtxSetupLlmBackend } from '../setup-models.js';
|
||||
import type { KtxSetupSourceType } from '../setup-sources.js';
|
||||
|
||||
async function runSetupArgs(
|
||||
|
|
@ -16,7 +16,7 @@ async function runSetupArgs(
|
|||
function positiveInteger(value: string): number {
|
||||
const parsed = Number.parseInt(value, 10);
|
||||
if (!Number.isInteger(parsed) || parsed <= 0) {
|
||||
throw new Error(`Expected a positive integer, received ${value}`);
|
||||
throw new InvalidArgumentError(`Expected a positive integer, received ${value}`);
|
||||
}
|
||||
return parsed;
|
||||
}
|
||||
|
|
@ -29,7 +29,7 @@ function embeddingBackend(value: string): 'openai' | 'sentence-transformers' {
|
|||
}
|
||||
|
||||
function llmBackend(value: string): KtxSetupLlmBackend {
|
||||
if (value === 'anthropic' || value === 'vertex' || value === 'claude-code' || value === 'codex') {
|
||||
if (isKtxSetupLlmBackend(value)) {
|
||||
return value;
|
||||
}
|
||||
throw new InvalidArgumentError(`invalid choice '${value}'`);
|
||||
|
|
@ -57,7 +57,8 @@ function sourceType(value: string): KtxSetupSourceType {
|
|||
value === 'metabase' ||
|
||||
value === 'looker' ||
|
||||
value === 'lookml' ||
|
||||
value === 'notion'
|
||||
value === 'notion' ||
|
||||
value === 'gdrive'
|
||||
) {
|
||||
return value;
|
||||
}
|
||||
|
|
@ -89,13 +90,13 @@ function shouldShowSetupEntryMenu(
|
|||
target?: string;
|
||||
global?: boolean;
|
||||
local?: boolean;
|
||||
installDir?: string;
|
||||
skipAgents?: boolean;
|
||||
yes?: boolean;
|
||||
input?: boolean;
|
||||
llmBackend?: KtxSetupLlmBackend;
|
||||
anthropicApiKeyEnv?: string;
|
||||
anthropicApiKeyFile?: string;
|
||||
llmModel?: string;
|
||||
vertexProject?: string;
|
||||
vertexLocation?: string;
|
||||
skipLlm?: boolean;
|
||||
|
|
@ -132,6 +133,9 @@ function shouldShowSetupEntryMenu(
|
|||
metabaseDatabaseId?: number;
|
||||
notionCrawlMode?: string;
|
||||
notionRootPageId?: string[];
|
||||
gdriveServiceAccountKeyRef?: string;
|
||||
gdriveFolderId?: string;
|
||||
gdriveRecursive?: boolean;
|
||||
skipSources?: boolean;
|
||||
},
|
||||
command: Command,
|
||||
|
|
@ -160,13 +164,13 @@ function shouldShowSetupEntryMenu(
|
|||
'target',
|
||||
'global',
|
||||
'local',
|
||||
'installDir',
|
||||
'skipAgents',
|
||||
'yes',
|
||||
'input',
|
||||
'llmBackend',
|
||||
'anthropicApiKeyEnv',
|
||||
'anthropicApiKeyFile',
|
||||
'llmModel',
|
||||
'vertexProject',
|
||||
'vertexLocation',
|
||||
'skipLlm',
|
||||
|
|
@ -197,6 +201,9 @@ function shouldShowSetupEntryMenu(
|
|||
'sourceTarget',
|
||||
'metabaseDatabaseId',
|
||||
'notionCrawlMode',
|
||||
'gdriveServiceAccountKeyRef',
|
||||
'gdriveFolderId',
|
||||
'gdriveRecursive',
|
||||
'skipSources',
|
||||
].some((optionName) => optionWasSpecified(command, optionName));
|
||||
}
|
||||
|
|
@ -204,8 +211,8 @@ function shouldShowSetupEntryMenu(
|
|||
export function registerSetupCommands(program: Command, context: KtxCliCommandContext): void {
|
||||
const setup = program
|
||||
.command('setup')
|
||||
.description('Set up or resume a local KTX project')
|
||||
.addOption(new Option('--project-dir <path>', 'KTX project directory').hideHelp())
|
||||
.description('Set up or resume a local ktx project')
|
||||
.addOption(new Option('--project-dir <path>', 'ktx project directory').hideHelp())
|
||||
.option('--agents', 'Install agent integration only', false)
|
||||
.addOption(
|
||||
new Option('--target <target>', 'Agent target').choices([
|
||||
|
|
@ -219,6 +226,10 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo
|
|||
)
|
||||
.option('--global', 'Install agent integration into the global target scope', false)
|
||||
.option('--local', 'Install Claude Code MCP config into the private per-project ~/.claude.json scope', false)
|
||||
.option(
|
||||
'--install-dir <path>',
|
||||
'Directory to install project-scoped agent config into (defaults to the ktx project directory)',
|
||||
)
|
||||
.addOption(new Option('--skip-agents', 'Leave agent integration incomplete for now').hideHelp().default(false))
|
||||
.option('--yes', 'Accept project creation and runtime install defaults where setup confirms', false)
|
||||
.option('--no-input', 'Disable interactive terminal input')
|
||||
|
|
@ -229,7 +240,6 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo
|
|||
.addOption(
|
||||
new Option('--anthropic-api-key-file <path>', 'File containing the Anthropic API key').hideHelp(),
|
||||
)
|
||||
.addOption(new Option('--llm-model <model>', 'LLM model ID or backend model alias').hideHelp())
|
||||
.addOption(new Option('--vertex-project <project>', 'Google Vertex AI project ID, env:NAME, or file:/path').hideHelp())
|
||||
.addOption(new Option('--vertex-location <location>', 'Google Vertex AI location, env:NAME, or file:/path').hideHelp())
|
||||
.addOption(new Option('--skip-llm', 'Leave LLM setup incomplete for now').hideHelp().default(false))
|
||||
|
|
@ -298,7 +308,7 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo
|
|||
.hideHelp(),
|
||||
)
|
||||
.addOption(
|
||||
new Option('--skip-databases', 'Leave database setup incomplete; KTX cannot work until a database is added')
|
||||
new Option('--skip-databases', 'Leave database setup incomplete; ktx cannot work until a database is added')
|
||||
.hideHelp()
|
||||
.default(false),
|
||||
)
|
||||
|
|
@ -334,6 +344,12 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo
|
|||
.default([] as string[])
|
||||
.hideHelp(),
|
||||
)
|
||||
.addOption(
|
||||
new Option('--gdrive-service-account-key-ref <ref>', 'file: reference to a Google service account JSON key')
|
||||
.hideHelp(),
|
||||
)
|
||||
.addOption(new Option('--gdrive-folder-id <id>', 'Google Drive folder id to ingest').hideHelp())
|
||||
.addOption(new Option('--gdrive-recursive', 'Recursively traverse Google Drive subfolders').hideHelp().default(false))
|
||||
.addOption(new Option('--skip-sources', 'Mark optional source setup complete with no sources').hideHelp().default(false))
|
||||
.showHelpAfterError();
|
||||
|
||||
|
|
@ -397,6 +413,16 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo
|
|||
context.setExitCode(1);
|
||||
return;
|
||||
}
|
||||
if (options.installDir && (options.global || options.local)) {
|
||||
context.io.stderr.write('Choose either --install-dir or a scope flag (--global / --local), not both.\n');
|
||||
context.setExitCode(1);
|
||||
return;
|
||||
}
|
||||
if (options.installDir && options.target === 'claude-desktop') {
|
||||
context.io.stderr.write('--install-dir does not apply to --target claude-desktop, which is always global.\n');
|
||||
context.setExitCode(1);
|
||||
return;
|
||||
}
|
||||
|
||||
const creatingDatabaseConnection = options.database.length > 0 || options.databaseUrl !== undefined;
|
||||
if (creatingDatabaseConnection && options.databaseConnectionId.length > 1) {
|
||||
|
|
@ -406,6 +432,8 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo
|
|||
}
|
||||
|
||||
const resolvedAgentScope = options.local ? 'local' : options.global ? 'global' : 'project';
|
||||
const debugEnabled =
|
||||
((command.optsWithGlobals ? command.optsWithGlobals() : command.opts()) as { debug?: unknown }).debug === true;
|
||||
await runSetupArgs(context, {
|
||||
command: 'run',
|
||||
projectDir: resolveCommandProjectDir(command),
|
||||
|
|
@ -413,14 +441,15 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo
|
|||
agents: options.agents === true,
|
||||
...(options.target ? { target: options.target } : {}),
|
||||
agentScope: resolvedAgentScope,
|
||||
...(options.installDir ? { installRoot: options.installDir } : {}),
|
||||
skipAgents: options.skipAgents === true,
|
||||
inputMode: options.input === false ? 'disabled' : 'auto',
|
||||
...(debugEnabled ? { debug: true } : {}),
|
||||
yes: options.yes === true,
|
||||
cliVersion: context.packageInfo.version,
|
||||
...(options.llmBackend ? { llmBackend: options.llmBackend } : {}),
|
||||
...(options.anthropicApiKeyEnv ? { anthropicApiKeyEnv: options.anthropicApiKeyEnv } : {}),
|
||||
...(options.anthropicApiKeyFile ? { anthropicApiKeyFile: options.anthropicApiKeyFile } : {}),
|
||||
...(options.llmModel ? { llmModel: options.llmModel } : {}),
|
||||
...(options.vertexProject ? { vertexProject: options.vertexProject } : {}),
|
||||
...(options.vertexLocation ? { vertexLocation: options.vertexLocation } : {}),
|
||||
skipLlm: options.skipLlm === true,
|
||||
|
|
@ -470,6 +499,11 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo
|
|||
...(options.metabaseDatabaseId !== undefined ? { metabaseDatabaseId: options.metabaseDatabaseId } : {}),
|
||||
...(options.notionCrawlMode ? { notionCrawlMode: options.notionCrawlMode } : {}),
|
||||
...(options.notionRootPageId.length > 0 ? { notionRootPageIds: options.notionRootPageId } : {}),
|
||||
...(options.gdriveServiceAccountKeyRef
|
||||
? { gdriveServiceAccountKeyRef: options.gdriveServiceAccountKeyRef }
|
||||
: {}),
|
||||
...(options.gdriveFolderId ? { gdriveFolderId: options.gdriveFolderId } : {}),
|
||||
...(options.gdriveRecursive ? { gdriveRecursive: true } : {}),
|
||||
runInitialSourceIngest: false,
|
||||
skipSources: options.skipSources === true,
|
||||
showEntryMenu: shouldShowSetupEntryMenu(options, command),
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ export function registerSlCommands(program: Command, context: KtxCliCommandConte
|
|||
.description('List, search, validate, or query local semantic-layer sources')
|
||||
.usage('[options] [query...]')
|
||||
.argument('[query...]', 'Search query; omit to list all sources')
|
||||
.option('--connection-id <id>', 'KTX connection id')
|
||||
.option('--connection-id <id>', 'ktx connection id')
|
||||
.option('--limit <number>', 'Maximum search results (search mode only)', parsePositiveIntegerOption)
|
||||
.addOption(
|
||||
new Option('--output <mode>', 'Output mode: pretty (default in TTY), plain (TSV), or json').choices([
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ export function registerSqlCommands(program: Command, context: KtxCliCommandCont
|
|||
.command('sql')
|
||||
.description('Execute parser-validated read-only SQL against a configured connection')
|
||||
.argument('<sql...>', 'SQL query to execute')
|
||||
.requiredOption('-c, --connection <id>', 'KTX connection id')
|
||||
.requiredOption('-c, --connection <id>', 'ktx connection id')
|
||||
.option('--max-rows <n>', 'Maximum rows to return', parseSqlMaxRowsOption, DEFAULT_MAX_ROWS)
|
||||
.addOption(
|
||||
new Option('--output <mode>', 'Output mode: pretty (default), plain (TSV), or json').choices([
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ function inputMode(options: { input?: boolean }): { inputMode?: 'disabled' } {
|
|||
export function registerStatusCommands(program: Command, context: KtxCliCommandContext): void {
|
||||
program
|
||||
.command('status')
|
||||
.description('Check current KTX setup and project readiness')
|
||||
.description('Check current ktx setup and project readiness')
|
||||
.option('--json', 'Print JSON output', false)
|
||||
.option('-v, --verbose', 'Show every check, including passing ones', false)
|
||||
.option('--validate', 'Only validate the ktx.yaml schema; skip readiness checks', false)
|
||||
|
|
|
|||
28
packages/cli/src/community-cta.ts
Normal file
28
packages/cli/src/community-cta.ts
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
import type { KtxCliIo } from './cli-runtime.js';
|
||||
import { isWritableTtyOutput } from './io/tty.js';
|
||||
import { dim } from './io/symbols.js';
|
||||
import { SLACK_URL } from './links.js';
|
||||
|
||||
type ErrorCtaVariant = 'error' | 'crash';
|
||||
|
||||
/** @internal */
|
||||
export const SLACK_HELP_FOOTER = `Community & support: ${SLACK_URL}`;
|
||||
|
||||
/** @internal */
|
||||
export const SLACK_SETUP_NOTE = {
|
||||
title: 'Community',
|
||||
body: `Questions or feedback? Join the ktx Slack: ${SLACK_URL}`,
|
||||
} as const;
|
||||
|
||||
export function writeErrorCommunityHint(io: KtxCliIo, variant: ErrorCtaVariant): void {
|
||||
if (!isWritableTtyOutput(io.stderr)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const line =
|
||||
variant === 'crash'
|
||||
? `This may be a bug - report it or ask in the ktx community: ${SLACK_URL}`
|
||||
: `Stuck? The ktx community can help: ${SLACK_URL}`;
|
||||
|
||||
io.stderr.write(`${dim(line)}\n`);
|
||||
}
|
||||
|
|
@ -3,9 +3,13 @@ import { DefaultLookerConnectionClientFactory } from './context/ingest/adapters/
|
|||
import type { LookerClient } from './context/ingest/adapters/looker/client.js';
|
||||
import type { MetabaseRuntimeClient } from './context/ingest/adapters/metabase/client-port.js';
|
||||
import { type NotionBotInfo, NotionClient } from './context/ingest/adapters/notion/notion-client.js';
|
||||
import { parseGdriveConnectionConfig, resolveGdriveServiceAccountKey } from './context/connections/gdrive-config.js';
|
||||
import { createLocalLookerCredentialResolver } from './context/ingest/adapters/looker/local-looker.adapter.js';
|
||||
import { metabaseRuntimeConfigFromLocalConnection } from './context/ingest/adapters/metabase/local-metabase.adapter.js';
|
||||
import { createGoogleDocsClients, verifyGdriveFolderAndCountDocs } from './context/ingest/adapters/gdrive/gdrive-client.js';
|
||||
import { gdriveServiceAccountKeySchema } from './context/ingest/adapters/gdrive/types.js';
|
||||
import { testRepoConnection } from './context/ingest/repo-fetch.js';
|
||||
import { federatedConnectionListing } from './context/connections/federation.js';
|
||||
import { getDriverRegistration } from './context/connections/drivers.js';
|
||||
import { parseNotionConnectionConfig, resolveNotionConnectionAuthToken } from './context/connections/notion-config.js';
|
||||
import { resolveKtxConfigReference } from './context/core/config-reference.js';
|
||||
|
|
@ -16,7 +20,8 @@ import { bold, dim, green, red, SYMBOLS } from './io/symbols.js';
|
|||
import { createKtxCliScanConnector } from './local-scan-connectors.js';
|
||||
import { profileMark } from './startup-profile.js';
|
||||
import { isDemoConnection } from './telemetry/demo-detect.js';
|
||||
import { emitTelemetryEvent } from './telemetry/index.js';
|
||||
import { emitTelemetryEvent, reportException } from './telemetry/index.js';
|
||||
import { collectTelemetryRedactionSecrets } from './telemetry/redaction-secrets.js';
|
||||
import { formatErrorDetail, scrubErrorClass } from './telemetry/scrubber.js';
|
||||
|
||||
profileMark('module:connection');
|
||||
|
|
@ -29,6 +34,10 @@ export type KtxConnectionArgs =
|
|||
type MetabaseTestPort = Pick<MetabaseRuntimeClient, 'testConnection' | 'getDatabases' | 'cleanup'>;
|
||||
type LookerTestPort = Pick<LookerClient, 'testConnection'>;
|
||||
type NotionTestPort = Pick<NotionClient, 'retrieveBotUser'>;
|
||||
type GdriveTestPort = Pick<
|
||||
ReturnType<typeof createGoogleDocsClients>['drive'],
|
||||
'listFiles' | 'getFile'
|
||||
>;
|
||||
type TestRepoConnection = typeof testRepoConnection;
|
||||
|
||||
export interface KtxConnectionDeps {
|
||||
|
|
@ -36,6 +45,7 @@ export interface KtxConnectionDeps {
|
|||
createMetabaseClient?: (project: KtxLocalProject, connectionId: string) => Promise<MetabaseTestPort>;
|
||||
createLookerClient?: (project: KtxLocalProject, connectionId: string) => Promise<LookerTestPort>;
|
||||
createNotionClient?: (project: KtxLocalProject, connectionId: string) => Promise<NotionTestPort>;
|
||||
createGdriveClient?: (project: KtxLocalProject, connectionId: string) => Promise<GdriveTestPort>;
|
||||
testRepoConnection?: TestRepoConnection;
|
||||
}
|
||||
|
||||
|
|
@ -50,6 +60,7 @@ const SUPPORTED_TEST_DRIVERS = [
|
|||
'metabase',
|
||||
'looker',
|
||||
'notion',
|
||||
'gdrive',
|
||||
'dbt',
|
||||
'metricflow',
|
||||
'lookml',
|
||||
|
|
@ -74,6 +85,12 @@ async function testNativeConnection(
|
|||
}
|
||||
const result = await connector.testConnection();
|
||||
if (!result.success) {
|
||||
// Re-throw the driver's original error so connection_test telemetry records
|
||||
// its real class (e.g. ConnectionError) and code (e.g. ELOGIN) instead of
|
||||
// collapsing every native failure to a generic Error with no code.
|
||||
if (result.cause instanceof Error) {
|
||||
throw result.cause;
|
||||
}
|
||||
throw new Error(result.error ?? 'connection test failed');
|
||||
}
|
||||
return { driver: connector.driver };
|
||||
|
|
@ -127,7 +144,7 @@ async function createDefaultLookerClient(
|
|||
connectionId: string,
|
||||
): Promise<LookerTestPort> {
|
||||
const factory = new DefaultLookerConnectionClientFactory(createLocalLookerCredentialResolver(project));
|
||||
return (await factory.createClient(connectionId)) as unknown as LookerTestPort;
|
||||
return factory.createLookerClient(connectionId);
|
||||
}
|
||||
|
||||
async function testLookerConnection(
|
||||
|
|
@ -175,6 +192,34 @@ async function testNotionConnection(
|
|||
return { bot: describeNotionBot(bot) };
|
||||
}
|
||||
|
||||
async function createDefaultGdriveClient(
|
||||
project: KtxLocalProject,
|
||||
connectionId: string,
|
||||
): Promise<GdriveTestPort> {
|
||||
const connection = project.config.connections[connectionId];
|
||||
if (!connection) {
|
||||
throw new Error(`Connection "${connectionId}" is not configured in ktx.yaml`);
|
||||
}
|
||||
const parsed = parseGdriveConnectionConfig(connection);
|
||||
const keyText = await resolveGdriveServiceAccountKey(parsed.service_account_key_ref);
|
||||
const key = gdriveServiceAccountKeySchema.parse(JSON.parse(keyText));
|
||||
return createGoogleDocsClients(key).drive;
|
||||
}
|
||||
|
||||
async function testGdriveConnection(
|
||||
project: KtxLocalProject,
|
||||
connectionId: string,
|
||||
createClient: (project: KtxLocalProject, connectionId: string) => Promise<GdriveTestPort>,
|
||||
): Promise<{ docs: number }> {
|
||||
const connection = project.config.connections[connectionId];
|
||||
if (!connection) {
|
||||
throw new Error(`Connection "${connectionId}" is not configured in ktx.yaml`);
|
||||
}
|
||||
const parsed = parseGdriveConnectionConfig(connection);
|
||||
const client = await createClient(project, connectionId);
|
||||
return { docs: await verifyGdriveFolderAndCountDocs(client, parsed.folder_id) };
|
||||
}
|
||||
|
||||
interface GitConnectionFields {
|
||||
repoUrl: string;
|
||||
authToken: string | null;
|
||||
|
|
@ -263,6 +308,15 @@ async function testConnectionByDriver(
|
|||
return { driver, detailKey: 'Bot', detailValue: result.bot };
|
||||
}
|
||||
|
||||
if (driver === 'gdrive') {
|
||||
const result = await testGdriveConnection(
|
||||
project,
|
||||
connectionId,
|
||||
deps.createGdriveClient ?? createDefaultGdriveClient,
|
||||
);
|
||||
return { driver, detailKey: 'Docs', detailValue: String(result.docs) };
|
||||
}
|
||||
|
||||
if (driver === 'dbt' || driver === 'metricflow' || driver === 'lookml') {
|
||||
const result = await testGitRepoConnection(
|
||||
project,
|
||||
|
|
@ -318,6 +372,21 @@ async function emitConnectionTest(input: {
|
|||
...(errorDetail ? { errorDetail } : {}),
|
||||
},
|
||||
});
|
||||
if (input.error) {
|
||||
await reportException({
|
||||
error: input.error,
|
||||
context: { source: 'connection test', handled: true, fatal: false },
|
||||
projectDir: input.project.projectDir,
|
||||
io: input.io,
|
||||
redactionSecrets: await collectTelemetryRedactionSecrets({
|
||||
project: input.project,
|
||||
connectionId: input.connectionId,
|
||||
includeLlm: false,
|
||||
includeEmbeddings: false,
|
||||
env: process.env,
|
||||
}),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function visualWidth(text: string): number {
|
||||
|
|
@ -425,15 +494,23 @@ export async function runKtxConnection(
|
|||
io.stdout.write('No connections configured. Run `ktx setup` to add one.\n');
|
||||
return 0;
|
||||
}
|
||||
const idWidth = Math.max('ID'.length, ...entries.map(([id]) => id.length));
|
||||
const driverWidth = Math.max(
|
||||
'DRIVER'.length,
|
||||
const federated = federatedConnectionListing(project.config.connections, args.projectDir);
|
||||
const idCandidates = [...entries.map(([id]) => id), ...(federated ? [federated.id] : [])];
|
||||
const driverLengths = [
|
||||
...entries.map(([, c]) => (c.driver ?? 'unknown').length),
|
||||
);
|
||||
...(federated ? [federated.driver.length] : []),
|
||||
];
|
||||
const idWidth = Math.max('ID'.length, ...idCandidates.map((id) => id.length));
|
||||
const driverWidth = Math.max('DRIVER'.length, ...driverLengths);
|
||||
io.stdout.write(`${'ID'.padEnd(idWidth)} ${'DRIVER'.padEnd(driverWidth)}\n`);
|
||||
for (const [id, connection] of entries) {
|
||||
io.stdout.write(`${id.padEnd(idWidth)} ${(connection.driver ?? 'unknown').padEnd(driverWidth)}\n`);
|
||||
}
|
||||
if (federated) {
|
||||
io.stdout.write(`${federated.id.padEnd(idWidth)} ${federated.driver.padEnd(driverWidth)}\n`);
|
||||
io.stdout.write(` federates: ${federated.members.join(', ')}\n`);
|
||||
io.stdout.write(` ${federated.hint}\n`);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,9 @@ import { assertReadOnlySql, limitSqlForExecution } from '../../context/connectio
|
|||
import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js';
|
||||
import { scopedTableNames } from '../../context/scan/table-ref.js';
|
||||
import {
|
||||
connectorTestFailure,
|
||||
createKtxConnectorCapabilities,
|
||||
type KtxConnectorTestResult,
|
||||
type KtxColumnSampleInput,
|
||||
type KtxColumnSampleResult,
|
||||
type KtxColumnStatsInput,
|
||||
|
|
@ -24,9 +26,7 @@ import {
|
|||
type KtxTableSampleInput,
|
||||
type KtxTableSampleResult,
|
||||
} from '../../context/scan/types.js';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import { resolveStringReference } from '../shared/string-reference.js';
|
||||
|
||||
export interface KtxBigQueryConnectionConfig {
|
||||
driver?: string;
|
||||
|
|
@ -136,18 +136,6 @@ class DefaultBigQueryClientFactory implements KtxBigQueryClientFactory {
|
|||
}
|
||||
}
|
||||
|
||||
function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
|
||||
if (value.startsWith('env:')) {
|
||||
return env[value.slice('env:'.length)] ?? '';
|
||||
}
|
||||
if (value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function stringConfigValue(
|
||||
connection: KtxBigQueryConnectionConfig | undefined,
|
||||
key: keyof KtxBigQueryConnectionConfig,
|
||||
|
|
@ -320,7 +308,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector {
|
|||
this.id = `bigquery:${options.connectionId}`;
|
||||
}
|
||||
|
||||
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
||||
async testConnection(): Promise<KtxConnectorTestResult> {
|
||||
try {
|
||||
const client = this.getClient();
|
||||
await client.getDatasets({ maxResults: 1 });
|
||||
|
|
@ -329,7 +317,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector {
|
|||
}
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
return connectorTestFailure(error);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,10 @@
|
|||
import { createClient } from '@clickhouse/client';
|
||||
import { getDialectForDriver } from '../../context/connections/dialects.js';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
|
||||
import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableRef, type KtxTableSampleInput, type KtxTableListEntry, type KtxTableSampleResult } from '../../context/scan/types.js';
|
||||
import { connectorTestFailure, createKtxConnectorCapabilities, type KtxConnectorTestResult, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableRef, type KtxTableSampleInput, type KtxTableListEntry, type KtxTableSampleResult } from '../../context/scan/types.js';
|
||||
import { scopedTableNames } from '../../context/scan/table-ref.js';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { resolveStringReference } from '../shared/string-reference.js';
|
||||
import { Agent as HttpsAgent } from 'node:https';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
|
||||
export interface KtxClickHouseConnectionConfig {
|
||||
driver?: string;
|
||||
|
|
@ -142,19 +140,6 @@ function stringConfigValue(
|
|||
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined;
|
||||
}
|
||||
|
||||
function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
|
||||
if (value.startsWith('env:')) {
|
||||
const envName = value.slice('env:'.length);
|
||||
return env[envName] ?? '';
|
||||
}
|
||||
if (value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function maybeNumber(value: unknown): number | undefined {
|
||||
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
|
||||
}
|
||||
|
|
@ -317,12 +302,12 @@ export class KtxClickHouseScanConnector implements KtxScanConnector {
|
|||
this.id = `clickhouse:${options.connectionId}`;
|
||||
}
|
||||
|
||||
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
||||
async testConnection(): Promise<KtxConnectorTestResult> {
|
||||
try {
|
||||
await this.query('SELECT 1');
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
return connectorTestFailure(error);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -645,7 +630,7 @@ export class KtxClickHouseScanConnector implements KtxScanConnector {
|
|||
|
||||
private assertConnection(connectionId: string): void {
|
||||
if (connectionId !== this.connectionId) {
|
||||
throw new Error(`KTX ClickHouse connector ${this.id} cannot serve connection ${connectionId}`);
|
||||
throw new Error(`ktx ClickHouse connector ${this.id} cannot serve connection ${connectionId}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
90
packages/cli/src/connectors/duckdb/federated-attach.ts
Normal file
90
packages/cli/src/connectors/duckdb/federated-attach.ts
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
import { sqliteDatabasePathFromConfig, type KtxSqliteConnectionConfig } from '../sqlite/connector.js';
|
||||
import { postgresPoolConfigFromConfig, type KtxPostgresConnectionConfig } from '../postgres/connector.js';
|
||||
import {
|
||||
mysqlConnectionPoolConfigFromConfig,
|
||||
type KtxMysqlConnectionConfig,
|
||||
} from '../mysql/connector.js';
|
||||
import type { FederatedMember } from '../../context/connections/federation.js';
|
||||
|
||||
function kvKeyword(value: string): string {
|
||||
// libpq/DuckDB key-value values quote with single quotes and backslash-escape.
|
||||
return /[\s'\\]/.test(value) ? `'${value.replaceAll('\\', '\\\\').replaceAll("'", "\\'")}'` : value;
|
||||
}
|
||||
|
||||
function withRequiredSslMode(connectionString: string): string {
|
||||
// DuckDB passes this libpq URL straight to the server, so an ssl:true member
|
||||
// must carry sslmode in the URL itself; keep a stronger mode the URL already pins.
|
||||
const url = new URL(connectionString);
|
||||
if (url.searchParams.has('sslmode')) {
|
||||
return connectionString;
|
||||
}
|
||||
url.searchParams.set('sslmode', 'require');
|
||||
return url.toString();
|
||||
}
|
||||
|
||||
function postgresAttachString(member: FederatedMember, env: NodeJS.ProcessEnv): string {
|
||||
const cfg = postgresPoolConfigFromConfig({
|
||||
connectionId: member.connectionId,
|
||||
connection: member.connection as KtxPostgresConnectionConfig,
|
||||
env,
|
||||
});
|
||||
if (cfg.connectionString) {
|
||||
return cfg.ssl ? withRequiredSslMode(cfg.connectionString) : cfg.connectionString;
|
||||
}
|
||||
const parts: string[] = [];
|
||||
if (cfg.host) parts.push(`host=${kvKeyword(cfg.host)}`);
|
||||
if (cfg.port) parts.push(`port=${cfg.port}`);
|
||||
if (cfg.database) parts.push(`dbname=${kvKeyword(cfg.database)}`);
|
||||
if (cfg.user) parts.push(`user=${kvKeyword(cfg.user)}`);
|
||||
if (cfg.password) parts.push(`password=${kvKeyword(cfg.password)}`);
|
||||
if (cfg.ssl) {
|
||||
parts.push('sslmode=require');
|
||||
}
|
||||
if (cfg.options) {
|
||||
parts.push(`options=${kvKeyword(cfg.options)}`);
|
||||
}
|
||||
return parts.join(' ');
|
||||
}
|
||||
|
||||
function mysqlAttachString(member: FederatedMember, env: NodeJS.ProcessEnv): string {
|
||||
const cfg = mysqlConnectionPoolConfigFromConfig({
|
||||
connectionId: member.connectionId,
|
||||
connection: member.connection as KtxMysqlConnectionConfig,
|
||||
env,
|
||||
});
|
||||
const parts: string[] = [
|
||||
`host=${kvKeyword(cfg.host)}`,
|
||||
`port=${cfg.port}`,
|
||||
`database=${kvKeyword(cfg.database)}`,
|
||||
`user=${kvKeyword(cfg.user)}`,
|
||||
];
|
||||
if (cfg.password) {
|
||||
parts.push(`password=${kvKeyword(cfg.password)}`);
|
||||
}
|
||||
if (cfg.ssl) {
|
||||
parts.push('ssl_mode=REQUIRED');
|
||||
}
|
||||
return parts.join(' ');
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves a federated member's ktx.yaml config into the connection target
|
||||
* DuckDB's ATTACH wants for that driver, reusing each connector's canonical
|
||||
* resolver so federation and standalone scans agree on config interpretation.
|
||||
*/
|
||||
export function federatedAttachTarget(member: FederatedMember, env: NodeJS.ProcessEnv): string {
|
||||
switch (member.driver.toLowerCase()) {
|
||||
case 'sqlite':
|
||||
return sqliteDatabasePathFromConfig({
|
||||
connectionId: member.connectionId,
|
||||
projectDir: member.projectDir,
|
||||
connection: member.connection as KtxSqliteConnectionConfig,
|
||||
});
|
||||
case 'postgres':
|
||||
return postgresAttachString(member, env);
|
||||
case 'mysql':
|
||||
return mysqlAttachString(member, env);
|
||||
default:
|
||||
throw new Error(`Driver "${member.driver}" cannot be attached by DuckDB federation.`);
|
||||
}
|
||||
}
|
||||
78
packages/cli/src/connectors/duckdb/federated-executor.ts
Normal file
78
packages/cli/src/connectors/duckdb/federated-executor.ts
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
import { DuckDBInstance } from '@duckdb/node-api';
|
||||
import { federatedAttachTarget } from './federated-attach.js';
|
||||
import type {
|
||||
KtxSqlQueryExecutionInput,
|
||||
KtxSqlQueryExecutionResult,
|
||||
} from '../../context/connections/query-executor.js';
|
||||
import { normalizeQueryRows } from '../../context/connections/query-executor.js';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
|
||||
import { attachTypeForDriver, type FederatedMember } from '../../context/connections/federation.js';
|
||||
|
||||
function quoteDuckdbIdentifier(id: string): string {
|
||||
return `"${id.replaceAll('"', '""')}"`;
|
||||
}
|
||||
|
||||
const MIN_SAFE_BIGINT = BigInt(Number.MIN_SAFE_INTEGER);
|
||||
const MAX_SAFE_BIGINT = BigInt(Number.MAX_SAFE_INTEGER);
|
||||
|
||||
// DuckDB returns integer columns as JS bigint (unserializable by JSON). Values
|
||||
// in Number's safe range become Number; larger magnitudes become strings so a
|
||||
// BIGINT beyond 2^53 keeps its exact value instead of silently rounding.
|
||||
function jsonSafeBigint(value: bigint): number | string {
|
||||
return value >= MIN_SAFE_BIGINT && value <= MAX_SAFE_BIGINT ? Number(value) : value.toString();
|
||||
}
|
||||
|
||||
function toJsonSafeRows(rows: unknown[][]): unknown[][] {
|
||||
return rows.map((row) => row.map((cell) => (typeof cell === 'bigint' ? jsonSafeBigint(cell) : cell)));
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function buildAttachStatements(members: FederatedMember[], env: NodeJS.ProcessEnv): string[] {
|
||||
const attachments = members.map((member) => ({
|
||||
type: attachTypeForDriver(member.driver),
|
||||
url: federatedAttachTarget(member, env),
|
||||
alias: member.connectionId,
|
||||
}));
|
||||
|
||||
const loadStatements = [...new Set(attachments.map((a) => a.type))].map(
|
||||
(type) => `INSTALL ${type}; LOAD ${type};`,
|
||||
);
|
||||
const attachStatements = attachments.map(
|
||||
({ type, url, alias }) =>
|
||||
`ATTACH '${url.replaceAll("'", "''")}' AS ${quoteDuckdbIdentifier(alias)} (TYPE ${type}, READ_ONLY);`,
|
||||
);
|
||||
return [...loadStatements, ...attachStatements];
|
||||
}
|
||||
|
||||
export async function executeFederatedQuery(
|
||||
members: FederatedMember[],
|
||||
input: KtxSqlQueryExecutionInput,
|
||||
env: NodeJS.ProcessEnv = process.env,
|
||||
): Promise<KtxSqlQueryExecutionResult> {
|
||||
const sql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
|
||||
const attachStatements = buildAttachStatements(members, env);
|
||||
|
||||
const instance = await DuckDBInstance.create(':memory:');
|
||||
try {
|
||||
const connection = await instance.connect();
|
||||
try {
|
||||
for (const statement of attachStatements) {
|
||||
await connection.run(statement);
|
||||
}
|
||||
const reader = await connection.runAndReadAll(sql);
|
||||
const rows = toJsonSafeRows(normalizeQueryRows(reader.getRows()));
|
||||
const headers = reader.columnNames();
|
||||
return {
|
||||
headers,
|
||||
rows,
|
||||
totalRows: rows.length,
|
||||
command: 'SELECT',
|
||||
rowCount: rows.length,
|
||||
};
|
||||
} finally {
|
||||
connection.closeSync();
|
||||
}
|
||||
} finally {
|
||||
instance.closeSync();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,8 +1,6 @@
|
|||
import mysql, { type FieldPacket, type Pool, type RowDataPacket } from 'mysql2/promise';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import { getDialectForDriver } from '../../context/connections/dialects.js';
|
||||
import { resolveStringReference } from '../shared/string-reference.js';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
|
||||
import {
|
||||
constraintDiscoveryWarning,
|
||||
|
|
@ -11,7 +9,9 @@ import {
|
|||
} from '../../context/scan/constraint-discovery.js';
|
||||
import { scopedTableNames } from '../../context/scan/table-ref.js';
|
||||
import {
|
||||
connectorTestFailure,
|
||||
createKtxConnectorCapabilities,
|
||||
type KtxConnectorTestResult,
|
||||
type KtxColumnSampleInput,
|
||||
type KtxColumnSampleResult,
|
||||
type KtxColumnStatsInput,
|
||||
|
|
@ -157,6 +157,15 @@ interface MysqlDistinctValueRow extends RowDataPacket {
|
|||
val: unknown;
|
||||
}
|
||||
|
||||
interface MysqlStatsRow extends RowDataPacket {
|
||||
column_name: string;
|
||||
estimated_cardinality: number | null;
|
||||
}
|
||||
|
||||
export interface KtxMysqlColumnStatisticsResult {
|
||||
cardinalityByColumn: Map<string, number>;
|
||||
}
|
||||
|
||||
class DefaultMysqlPoolFactory implements KtxMysqlPoolFactory {
|
||||
createPool(config: KtxMysqlPoolConfig): KtxMysqlPool {
|
||||
return mysql.createPool(config) as Pool;
|
||||
|
|
@ -172,19 +181,6 @@ function stringConfigValue(
|
|||
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined;
|
||||
}
|
||||
|
||||
function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
|
||||
if (value.startsWith('env:')) {
|
||||
const envName = value.slice('env:'.length);
|
||||
return env[envName] ?? '';
|
||||
}
|
||||
if (value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function maybeNumber(value: unknown): number | undefined {
|
||||
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
|
||||
}
|
||||
|
|
@ -382,7 +378,7 @@ export class KtxMysqlScanConnector implements KtxScanConnector {
|
|||
readonly capabilities = createKtxConnectorCapabilities({
|
||||
tableSampling: true,
|
||||
columnSampling: true,
|
||||
columnStats: false,
|
||||
columnStats: true,
|
||||
readOnlySql: true,
|
||||
nestedAnalysis: true,
|
||||
formalForeignKeys: true,
|
||||
|
|
@ -413,12 +409,12 @@ export class KtxMysqlScanConnector implements KtxScanConnector {
|
|||
this.id = `mysql:${options.connectionId}`;
|
||||
}
|
||||
|
||||
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
||||
async testConnection(): Promise<KtxConnectorTestResult> {
|
||||
try {
|
||||
await this.query('SELECT 1');
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
return connectorTestFailure(error);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -560,8 +556,29 @@ export class KtxMysqlScanConnector implements KtxScanConnector {
|
|||
return { values, nullCount: null, distinctCount: null };
|
||||
}
|
||||
|
||||
async columnStats(_input: KtxColumnStatsInput, _ctx: KtxScanContext): Promise<KtxColumnStatsResult | null> {
|
||||
return null;
|
||||
async columnStats(input: KtxColumnStatsInput, _ctx: KtxScanContext): Promise<KtxColumnStatsResult | null> {
|
||||
const stats = await this.getColumnStatistics(input.table);
|
||||
const value = stats?.cardinalityByColumn.get(input.column);
|
||||
return value === undefined
|
||||
? null
|
||||
: { min: null, max: null, average: null, nullCount: null, distinctCount: value };
|
||||
}
|
||||
|
||||
async getColumnStatistics(table: KtxTableRef): Promise<KtxMysqlColumnStatisticsResult | null> {
|
||||
const schema = table.db ?? this.poolConfig.database;
|
||||
const sql = this.dialect.generateColumnStatisticsQuery(schema, table.name);
|
||||
if (!sql) {
|
||||
return null;
|
||||
}
|
||||
const rows = await this.queryRaw<MysqlStatsRow>(sql);
|
||||
const cardinalityByColumn = new Map<string, number>();
|
||||
for (const row of rows) {
|
||||
const cardinality = Number(row.estimated_cardinality);
|
||||
if (Number.isFinite(cardinality) && cardinality >= 0) {
|
||||
cardinalityByColumn.set(row.column_name, cardinality);
|
||||
}
|
||||
}
|
||||
return cardinalityByColumn.size > 0 ? { cardinalityByColumn } : null;
|
||||
}
|
||||
|
||||
async executeReadOnly(input: KtxMysqlReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
|
||||
|
|
@ -762,7 +779,7 @@ export class KtxMysqlScanConnector implements KtxScanConnector {
|
|||
|
||||
private assertConnection(connectionId: string): void {
|
||||
if (connectionId !== this.connectionId) {
|
||||
throw new Error(`KTX MySQL connector ${this.id} cannot serve connection ${connectionId}`);
|
||||
throw new Error(`ktx MySQL connector ${this.id} cannot serve connection ${connectionId}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -171,8 +171,18 @@ export class KtxMysqlDialect implements KtxDialect {
|
|||
`;
|
||||
}
|
||||
|
||||
generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null {
|
||||
return null;
|
||||
generateColumnStatisticsQuery(schemaName: string, tableName: string): string | null {
|
||||
return `
|
||||
SELECT
|
||||
COLUMN_NAME AS column_name,
|
||||
MAX(CARDINALITY) AS estimated_cardinality
|
||||
FROM INFORMATION_SCHEMA.STATISTICS
|
||||
WHERE TABLE_SCHEMA = '${schemaName.replace(/'/g, "''")}'
|
||||
AND TABLE_NAME = '${tableName.replace(/'/g, "''")}'
|
||||
AND CARDINALITY IS NOT NULL
|
||||
AND SEQ_IN_INDEX = 1
|
||||
GROUP BY COLUMN_NAME
|
||||
`;
|
||||
}
|
||||
|
||||
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
import { readFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import { resolveStringReference } from '../shared/string-reference.js';
|
||||
import { getDialectForDriver } from '../../context/connections/dialects.js';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
|
||||
import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js';
|
||||
import { scopedTableNames } from '../../context/scan/table-ref.js';
|
||||
import {
|
||||
connectorTestFailure,
|
||||
createKtxConnectorCapabilities,
|
||||
type KtxConnectorTestResult,
|
||||
type KtxColumnSampleInput,
|
||||
type KtxColumnSampleResult,
|
||||
type KtxColumnStatsInput,
|
||||
|
|
@ -279,17 +279,6 @@ function stringConfigValue(
|
|||
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined;
|
||||
}
|
||||
|
||||
function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
|
||||
if (value.startsWith('env:')) {
|
||||
return env[value.slice('env:'.length)] ?? '';
|
||||
}
|
||||
if (value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function numberValue(value: unknown): number | undefined {
|
||||
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
|
||||
|
|
@ -442,12 +431,12 @@ export class KtxPostgresScanConnector implements KtxScanConnector {
|
|||
this.id = `postgres:${options.connectionId}`;
|
||||
}
|
||||
|
||||
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
||||
async testConnection(): Promise<KtxConnectorTestResult> {
|
||||
try {
|
||||
await this.query('SELECT 1');
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
return connectorTestFailure(error);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
20
packages/cli/src/connectors/shared/string-reference.ts
Normal file
20
packages/cli/src/connectors/shared/string-reference.ts
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
import { readFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
|
||||
/**
|
||||
* Resolves a config string that may reference an environment variable
|
||||
* (`env:NAME`) or a file (`file:/path`, `~` expands to the home dir).
|
||||
* Plain values pass through unchanged.
|
||||
*/
|
||||
export function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
|
||||
if (value.startsWith('env:')) {
|
||||
return env[value.slice('env:'.length)] ?? '';
|
||||
}
|
||||
if (value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(rawPath[1] === '/' ? 2 : 1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
|
@ -1,13 +1,13 @@
|
|||
import { createPrivateKey } from 'node:crypto';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import { getDialectForDriver } from '../../context/connections/dialects.js';
|
||||
import { resolveStringReference } from '../shared/string-reference.js';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
|
||||
import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js';
|
||||
import { scopedTableNames } from '../../context/scan/table-ref.js';
|
||||
import {
|
||||
connectorTestFailure,
|
||||
createKtxConnectorCapabilities,
|
||||
type KtxConnectorTestResult,
|
||||
type KtxColumnSampleInput,
|
||||
type KtxColumnSampleResult,
|
||||
type KtxColumnStatsInput,
|
||||
|
|
@ -105,7 +105,7 @@ export interface KtxSnowflakeScanConnectorOptions {
|
|||
connectionId: string;
|
||||
connection: KtxSnowflakeConnectionConfig | undefined;
|
||||
/**
|
||||
* KTX project directory. When provided, snowflake-sdk's logger is redirected to
|
||||
* ktx project directory. When provided, snowflake-sdk's logger is redirected to
|
||||
* `<projectDir>/.ktx/logs/snowflake.log` so its JSON output does not bleed into
|
||||
* the CLI's TTY. Tests that use a fake driverFactory can leave this undefined.
|
||||
*/
|
||||
|
|
@ -133,18 +133,6 @@ export interface KtxSnowflakeColumnDistinctValuesResult {
|
|||
|
||||
const DATE_TYPES = ['DATE', 'TIMESTAMP', 'TIMESTAMP_LTZ', 'TIMESTAMP_NTZ', 'TIMESTAMP_TZ', 'TIME'];
|
||||
|
||||
function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
|
||||
if (value.startsWith('env:')) {
|
||||
return env[value.slice('env:'.length)] ?? '';
|
||||
}
|
||||
if (value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function stringConfigValue(
|
||||
connection: KtxSnowflakeConnectionConfig | undefined,
|
||||
key: keyof KtxSnowflakeConnectionConfig,
|
||||
|
|
@ -464,7 +452,7 @@ class SnowflakeSdkDriver implements KtxSnowflakeDriver {
|
|||
await this.query('SELECT 1');
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
return connectorTestFailure(error);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -573,7 +561,7 @@ export class KtxSnowflakeScanConnector implements KtxScanConnector {
|
|||
}
|
||||
}
|
||||
|
||||
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
||||
async testConnection(): Promise<KtxConnectorTestResult> {
|
||||
return this.getDriver().test();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import { fileURLToPath } from 'node:url';
|
|||
import { getDialectForDriver } from '../../context/connections/dialects.js';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
|
||||
import { normalizeQueryRows } from '../../context/connections/query-executor.js';
|
||||
import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaForeignKey, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableListEntry, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js';
|
||||
import { connectorTestFailure, createKtxConnectorCapabilities, type KtxConnectorTestResult, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaForeignKey, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableListEntry, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js';
|
||||
import { scopedTableNames } from '../../context/scan/table-ref.js';
|
||||
|
||||
export interface KtxSqliteConnectionConfig {
|
||||
|
|
@ -97,30 +97,6 @@ function sqlitePathFromUrl(url: string): string {
|
|||
return url;
|
||||
}
|
||||
|
||||
function stripLeadingSqlComments(sql: string): string {
|
||||
let index = 0;
|
||||
while (index < sql.length) {
|
||||
while (/\s/.test(sql[index] ?? '')) {
|
||||
index += 1;
|
||||
}
|
||||
if (sql.startsWith('--', index)) {
|
||||
const end = sql.indexOf('\n', index + 2);
|
||||
index = end === -1 ? sql.length : end + 1;
|
||||
continue;
|
||||
}
|
||||
if (sql.startsWith('/*', index)) {
|
||||
const end = sql.indexOf('*/', index + 2);
|
||||
if (end === -1) {
|
||||
return sql.slice(index);
|
||||
}
|
||||
index = end + 2;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return sql.slice(index);
|
||||
}
|
||||
|
||||
export function isKtxSqliteConnectionConfig(
|
||||
connection: KtxSqliteConnectionConfig | undefined,
|
||||
): connection is KtxSqliteConnectionConfig {
|
||||
|
|
@ -167,7 +143,7 @@ export class KtxSqliteScanConnector implements KtxScanConnector {
|
|||
this.id = `sqlite:${options.connectionId}`;
|
||||
}
|
||||
|
||||
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
||||
async testConnection(): Promise<KtxConnectorTestResult> {
|
||||
try {
|
||||
if (!existsSync(this.dbPath) || !statSync(this.dbPath).isFile()) {
|
||||
return { success: false, error: `File not found: ${this.dbPath}` };
|
||||
|
|
@ -175,7 +151,7 @@ export class KtxSqliteScanConnector implements KtxScanConnector {
|
|||
this.database().prepare('SELECT 1').get();
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
return connectorTestFailure(error);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -255,7 +231,7 @@ export class KtxSqliteScanConnector implements KtxScanConnector {
|
|||
|
||||
async executeReadOnly(input: KtxSqliteReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = this.query(limitSqlForExecution(stripLeadingSqlComments(input.sql), input.maxRows), input.params);
|
||||
const result = this.query(limitSqlForExecution(input.sql, input.maxRows), input.params);
|
||||
return { ...result, rowCount: result.rows.length };
|
||||
}
|
||||
|
||||
|
|
@ -379,7 +355,7 @@ export class KtxSqliteScanConnector implements KtxScanConnector {
|
|||
|
||||
private assertConnection(connectionId: string): void {
|
||||
if (connectionId !== this.connectionId) {
|
||||
throw new Error(`KTX SQLite connector ${this.id} cannot serve connection ${connectionId}`);
|
||||
throw new Error(`ktx SQLite connector ${this.id} cannot serve connection ${connectionId}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
import { assertReadOnlySql } from '../../context/connections/read-only-sql.js';
|
||||
import { assertReadOnlySql, hoistLeadingCte, stripTrailingSqlNoise } from '../../context/connections/read-only-sql.js';
|
||||
import { getDialectForDriver } from '../../context/connections/dialects.js';
|
||||
import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js';
|
||||
import { scopedTableNames } from '../../context/scan/table-ref.js';
|
||||
import {
|
||||
connectorTestFailure,
|
||||
createKtxConnectorCapabilities,
|
||||
type KtxConnectorTestResult,
|
||||
type KtxColumnSampleInput,
|
||||
type KtxColumnSampleResult,
|
||||
type KtxColumnStatsInput,
|
||||
|
|
@ -23,10 +25,8 @@ import {
|
|||
type KtxTableSampleInput,
|
||||
type KtxTableSampleResult,
|
||||
} from '../../context/scan/types.js';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import sql from 'mssql';
|
||||
import { resolveStringReference } from '../shared/string-reference.js';
|
||||
|
||||
export interface KtxSqlServerConnectionConfig {
|
||||
driver?: string;
|
||||
|
|
@ -206,18 +206,6 @@ function stringConfigValue(
|
|||
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined;
|
||||
}
|
||||
|
||||
function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
|
||||
if (value.startsWith('env:')) {
|
||||
return env[value.slice('env:'.length)] ?? '';
|
||||
}
|
||||
if (value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function parseSqlServerUrl(url: string): Partial<KtxSqlServerConnectionConfig> {
|
||||
const parsed = new URL(url);
|
||||
return {
|
||||
|
|
@ -282,14 +270,15 @@ function isDeniedError(error: unknown): boolean {
|
|||
}
|
||||
|
||||
function limitSqlForSqlServerExecution(sqlText: string, maxRows: number | undefined): string {
|
||||
const trimmed = assertReadOnlySql(sqlText).replace(/;+\s*$/, '');
|
||||
const trimmed = stripTrailingSqlNoise(assertReadOnlySql(sqlText));
|
||||
if (!maxRows) {
|
||||
return trimmed;
|
||||
}
|
||||
if (!Number.isInteger(maxRows) || maxRows <= 0) {
|
||||
throw new Error('maxRows must be a positive integer.');
|
||||
}
|
||||
return `SELECT TOP ${maxRows} * FROM (${trimmed}) AS ktx_query_result`;
|
||||
const { withPrefix, body } = hoistLeadingCte(trimmed);
|
||||
return `${withPrefix}SELECT TOP ${maxRows} * FROM (${body}) AS ktx_query_result`;
|
||||
}
|
||||
|
||||
export function isKtxSqlServerConnectionConfig(
|
||||
|
|
@ -384,12 +373,12 @@ export class KtxSqlServerScanConnector implements KtxScanConnector {
|
|||
this.id = `sqlserver:${options.connectionId}`;
|
||||
}
|
||||
|
||||
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
||||
async testConnection(): Promise<KtxConnectorTestResult> {
|
||||
try {
|
||||
await this.query('SELECT 1');
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
return connectorTestFailure(error);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -831,7 +820,7 @@ export class KtxSqlServerScanConnector implements KtxScanConnector {
|
|||
|
||||
private assertConnection(connectionId: string): void {
|
||||
if (connectionId !== this.connectionId) {
|
||||
throw new Error(`KTX SQL Server connector ${this.id} cannot serve connection ${connectionId}`);
|
||||
throw new Error(`ktx SQL Server connector ${this.id} cannot serve connection ${connectionId}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,6 +12,13 @@ import { buildPublicIngestPlan, executePublicIngestTarget, publicProgressMessage
|
|||
import { createAggregateProgressPort } from './progress-port-adapter.js';
|
||||
import { formatDuration } from './demo-metrics.js';
|
||||
import { profileMark } from './startup-profile.js';
|
||||
import {
|
||||
isFreshStarCountCache,
|
||||
readStarCountCache,
|
||||
writeStarCountCache,
|
||||
} from './star-prompt/cache.js';
|
||||
import { fetchGitHubStarCount as defaultFetchGitHubStarCount } from './star-prompt/star-count.js';
|
||||
import { renderStarPromptLine } from './star-prompt/star-line.js';
|
||||
|
||||
profileMark('module:context-build-view');
|
||||
|
||||
|
|
@ -79,6 +86,7 @@ export interface ContextBuildViewState {
|
|||
frame: number;
|
||||
startedAt: number | null;
|
||||
totalElapsedMs: number;
|
||||
starCount: number | null;
|
||||
}
|
||||
|
||||
export interface ContextBuildArgs {
|
||||
|
|
@ -121,6 +129,8 @@ interface CompletedItemName {
|
|||
interface ContextBuildRenderOptions {
|
||||
styled?: boolean;
|
||||
showHint?: boolean;
|
||||
showStarPrompt?: boolean;
|
||||
columns?: number;
|
||||
hintText?: string;
|
||||
projectDir?: string;
|
||||
title?: string;
|
||||
|
|
@ -138,6 +148,15 @@ export interface ContextBuildDeps {
|
|||
now?: () => number;
|
||||
onSourceProgress?: (sources: ContextBuildSourceProgressUpdate[]) => void;
|
||||
sourceProgressThrottleMs?: number;
|
||||
fetchStarCount?: typeof defaultFetchGitHubStarCount;
|
||||
starPromptEnv?: StarPromptEnv;
|
||||
starPromptHomeDir?: string;
|
||||
}
|
||||
|
||||
interface StarPromptEnv extends NodeJS.ProcessEnv {
|
||||
CI?: string;
|
||||
DO_NOT_TRACK?: string;
|
||||
KTX_NO_STAR?: string;
|
||||
}
|
||||
|
||||
// --- Rendering ---
|
||||
|
|
@ -396,7 +415,7 @@ export function renderContextBuildView(
|
|||
const hasActive = allTargets.some((t) => t.status === 'running' || t.status === 'queued');
|
||||
const allDone = totalCount > 0 && !hasActive;
|
||||
|
||||
const headerParts = [options.title ?? 'Building KTX context'];
|
||||
const headerParts = [options.title ?? 'Building ktx context'];
|
||||
if (totalCount > 0) {
|
||||
const progressParts: string[] = [`${doneCount}/${totalCount}`];
|
||||
if (state.totalElapsedMs > 0) progressParts.push(formatDuration(state.totalElapsedMs));
|
||||
|
|
@ -427,6 +446,14 @@ export function renderContextBuildView(
|
|||
lines.push('');
|
||||
}
|
||||
|
||||
if (options.showStarPrompt && hasActive) {
|
||||
const starPrompt = renderStarPromptLine({
|
||||
count: state.starCount,
|
||||
columns: options.columns ?? 80,
|
||||
});
|
||||
lines.push(styled ? dim(starPrompt) : starPrompt);
|
||||
}
|
||||
|
||||
if (options.showHint && hasActive) {
|
||||
const hintContent = options.hintText ?? 'Ctrl+C to stop';
|
||||
const hint = ` ${hintContent}`;
|
||||
|
|
@ -584,6 +611,7 @@ export function viewStateFromSourceProgress(
|
|||
frame: 0,
|
||||
startedAt: startedAtMs ?? null,
|
||||
totalElapsedMs: startedAtMs ? now - startedAtMs : 0,
|
||||
starCount: null,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -631,6 +659,9 @@ export function createRepainter(io: KtxCliIo) {
|
|||
hasPainted = true;
|
||||
lastCursorUpRows = cursorUpRowsAfterWrite(content);
|
||||
},
|
||||
columns() {
|
||||
return terminalColumns();
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -707,7 +738,7 @@ function failedStepDetail(result: KtxPublicIngestTargetResult): string | null {
|
|||
const INTERNAL_FAILURE_LINE_RE =
|
||||
/^(Report|Run|Job|Status|Adapter|Connection|Sync|Mode|Dry run|Diff|Tasks|Work units|Failed tasks|Saved memory|Provenance rows):\s*/;
|
||||
const ACTIONABLE_FAILURE_LINE_RE =
|
||||
/^(Missing bundled Python runtime manifest|KTX Python runtime is required|KTX daemon HTTP|Error:|Failed\b|Could not\b|Cannot\b)/;
|
||||
/^(Missing bundled Python runtime manifest|ktx Python runtime is required|ktx daemon HTTP|Error:|Failed\b|Could not\b|Cannot\b)/;
|
||||
|
||||
function trimErrorPrefix(line: string): string {
|
||||
return line.replace(/^Error:\s*/, '');
|
||||
|
|
@ -718,7 +749,7 @@ function firstCapturedFailureLine(output: string | undefined): string | null {
|
|||
.split(/\r?\n/)
|
||||
.map((candidate) => candidate.trim())
|
||||
.filter((candidate) => candidate.length > 0)
|
||||
.filter((candidate) => !candidate.startsWith('KTX scan completed'))
|
||||
.filter((candidate) => !candidate.startsWith('ktx scan completed'))
|
||||
.filter((candidate) => !INTERNAL_FAILURE_LINE_RE.test(candidate));
|
||||
const line = lines.find((candidate) => ACTIONABLE_FAILURE_LINE_RE.test(candidate)) ?? lines.at(-1) ?? null;
|
||||
return line ? trimErrorPrefix(line) : null;
|
||||
|
|
@ -758,7 +789,7 @@ function failureTextForTarget(input: {
|
|||
const code = networkErrorCode(input.error, input.capturedOutput);
|
||||
if (code && isLocalSqlAnalysisConnectionRefused({ capturedOutput: input.capturedOutput, fallback: input.fallback })) {
|
||||
return [
|
||||
`KTX could not reach the local SQL analysis runtime while processing query history for ${input.target.connectionId}.`,
|
||||
`ktx could not reach the local SQL analysis runtime while processing query history for ${input.target.connectionId}.`,
|
||||
`Reason: ${NETWORK_ERROR_REASONS[code]} (${code}).`,
|
||||
`Retry: ${retryCommand({
|
||||
projectDir: input.projectDir,
|
||||
|
|
@ -772,7 +803,7 @@ function failureTextForTarget(input: {
|
|||
if (code) {
|
||||
const operation = input.target.operation === 'database-ingest' ? 'reading schema for' : 'ingesting';
|
||||
return [
|
||||
`KTX lost its connection to ${friendlyDriverName(input.target.driver)} while ${operation} ${input.target.connectionId}.`,
|
||||
`ktx lost its connection to ${friendlyDriverName(input.target.driver)} while ${operation} ${input.target.connectionId}.`,
|
||||
`Reason: ${NETWORK_ERROR_REASONS[code]} (${code}).`,
|
||||
`Retry: ${retryCommand({
|
||||
projectDir: input.projectDir,
|
||||
|
|
@ -806,6 +837,7 @@ export function initViewState(targets: KtxPublicIngestPlanTarget[]): ContextBuil
|
|||
frame: 0,
|
||||
startedAt: null,
|
||||
totalElapsedMs: 0,
|
||||
starCount: null,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -817,6 +849,50 @@ function formatProgressDetail(
|
|||
return `[${percent}%] ${publicProgressMessage(update.message, target)}`;
|
||||
}
|
||||
|
||||
const STAR_COUNT_CACHE_TTL_MS = 24 * 60 * 60 * 1000;
|
||||
|
||||
function envFlag(value: string | undefined): boolean {
|
||||
return value !== undefined && value !== '' && value !== '0' && value !== 'false';
|
||||
}
|
||||
|
||||
function shouldSuppressStarPrompt(env: StarPromptEnv): boolean {
|
||||
return envFlag(env.CI) || envFlag(env.DO_NOT_TRACK) || envFlag(env.KTX_NO_STAR);
|
||||
}
|
||||
|
||||
function startStarPromptCountRefresh(input: {
|
||||
fetchStarCount: typeof defaultFetchGitHubStarCount;
|
||||
homeDir?: string;
|
||||
now: () => number;
|
||||
paint: () => void;
|
||||
state: ContextBuildViewState;
|
||||
}): void {
|
||||
const cached = readStarCountCache({ homeDir: input.homeDir });
|
||||
if (cached) {
|
||||
input.state.starCount = cached.count;
|
||||
}
|
||||
|
||||
if (isFreshStarCountCache(cached, new Date(input.now()), STAR_COUNT_CACHE_TTL_MS)) {
|
||||
return;
|
||||
}
|
||||
|
||||
void input.fetchStarCount()
|
||||
.then((count) => {
|
||||
if (typeof count !== 'number' || !Number.isFinite(count)) {
|
||||
return;
|
||||
}
|
||||
input.state.starCount = count;
|
||||
input.paint();
|
||||
void writeStarCountCache(
|
||||
{
|
||||
count,
|
||||
fetchedAt: new Date(input.now()).toISOString(),
|
||||
},
|
||||
{ homeDir: input.homeDir },
|
||||
);
|
||||
})
|
||||
.catch(() => undefined);
|
||||
}
|
||||
|
||||
export async function runContextBuild(
|
||||
project: KtxPublicIngestProject,
|
||||
args: ContextBuildArgs,
|
||||
|
|
@ -838,13 +914,31 @@ export async function runContextBuild(
|
|||
state.startedAt = nowFn();
|
||||
|
||||
const repainter = isTTY ? createRepainter(io) : null;
|
||||
const starPromptEnabled = repainter !== null && !shouldSuppressStarPrompt(deps.starPromptEnv ?? process.env);
|
||||
const viewOpts = {
|
||||
styled: true,
|
||||
projectDir: args.projectDir,
|
||||
notices: plan.notices ?? [],
|
||||
warnings: plan.warnings,
|
||||
};
|
||||
const paint = (hint: boolean) => repainter?.paint(renderContextBuildView(state, { ...viewOpts, showHint: hint }));
|
||||
const paint = (hint: boolean) =>
|
||||
repainter?.paint(
|
||||
renderContextBuildView(state, {
|
||||
...viewOpts,
|
||||
showHint: hint,
|
||||
showStarPrompt: starPromptEnabled && hint,
|
||||
columns: repainter.columns(),
|
||||
}),
|
||||
);
|
||||
if (starPromptEnabled) {
|
||||
startStarPromptCountRefresh({
|
||||
fetchStarCount: deps.fetchStarCount ?? defaultFetchGitHubStarCount,
|
||||
homeDir: deps.starPromptHomeDir,
|
||||
now: nowFn,
|
||||
paint: () => paint(true),
|
||||
state,
|
||||
});
|
||||
}
|
||||
paint(true);
|
||||
|
||||
let spinnerInterval: ReturnType<typeof setInterval> | null = null;
|
||||
|
|
|
|||
|
|
@ -17,7 +17,6 @@ export interface KtxDriverRegistration {
|
|||
readonly driver: KtxConnectionDriver;
|
||||
readonly scopeConfigKey: KtxScopeConfigKey | null;
|
||||
readonly hasHistoricSqlReader: boolean;
|
||||
readonly hasLocalQueryExecutor: boolean;
|
||||
load(): Promise<KtxDriverConnectorModule>;
|
||||
}
|
||||
|
||||
|
|
@ -31,7 +30,6 @@ export const driverRegistrations: Record<KtxConnectionDriver, KtxDriverRegistrat
|
|||
driver: 'bigquery',
|
||||
scopeConfigKey: 'dataset_ids',
|
||||
hasHistoricSqlReader: true,
|
||||
hasLocalQueryExecutor: false,
|
||||
load: async () => {
|
||||
const m = await import('../../connectors/bigquery/connector.js');
|
||||
return {
|
||||
|
|
@ -53,7 +51,6 @@ export const driverRegistrations: Record<KtxConnectionDriver, KtxDriverRegistrat
|
|||
driver: 'clickhouse',
|
||||
scopeConfigKey: 'databases',
|
||||
hasHistoricSqlReader: false,
|
||||
hasLocalQueryExecutor: false,
|
||||
load: async () => {
|
||||
const m = await import('../../connectors/clickhouse/connector.js');
|
||||
return {
|
||||
|
|
@ -75,7 +72,6 @@ export const driverRegistrations: Record<KtxConnectionDriver, KtxDriverRegistrat
|
|||
driver: 'mysql',
|
||||
scopeConfigKey: 'schemas',
|
||||
hasHistoricSqlReader: false,
|
||||
hasLocalQueryExecutor: false,
|
||||
load: async () => {
|
||||
const m = await import('../../connectors/mysql/connector.js');
|
||||
return {
|
||||
|
|
@ -97,7 +93,6 @@ export const driverRegistrations: Record<KtxConnectionDriver, KtxDriverRegistrat
|
|||
driver: 'postgres',
|
||||
scopeConfigKey: 'schemas',
|
||||
hasHistoricSqlReader: true,
|
||||
hasLocalQueryExecutor: true,
|
||||
load: async () => {
|
||||
const m = await import('../../connectors/postgres/connector.js');
|
||||
return {
|
||||
|
|
@ -119,7 +114,6 @@ export const driverRegistrations: Record<KtxConnectionDriver, KtxDriverRegistrat
|
|||
driver: 'sqlite',
|
||||
scopeConfigKey: null,
|
||||
hasHistoricSqlReader: false,
|
||||
hasLocalQueryExecutor: true,
|
||||
load: async () => {
|
||||
const m = await import('../../connectors/sqlite/connector.js');
|
||||
return {
|
||||
|
|
@ -141,7 +135,6 @@ export const driverRegistrations: Record<KtxConnectionDriver, KtxDriverRegistrat
|
|||
driver: 'snowflake',
|
||||
scopeConfigKey: 'schema_names',
|
||||
hasHistoricSqlReader: true,
|
||||
hasLocalQueryExecutor: false,
|
||||
load: async () => {
|
||||
const m = await import('../../connectors/snowflake/connector.js');
|
||||
return {
|
||||
|
|
@ -163,7 +156,6 @@ export const driverRegistrations: Record<KtxConnectionDriver, KtxDriverRegistrat
|
|||
driver: 'sqlserver',
|
||||
scopeConfigKey: 'schemas',
|
||||
hasHistoricSqlReader: false,
|
||||
hasLocalQueryExecutor: false,
|
||||
load: async () => {
|
||||
const m = await import('../../connectors/sqlserver/connector.js');
|
||||
return {
|
||||
|
|
|
|||
83
packages/cli/src/context/connections/federation.ts
Normal file
83
packages/cli/src/context/connections/federation.ts
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
import type { KtxProjectConnectionConfig } from '../project/config.js';
|
||||
|
||||
/** Stable id for the runtime-derived federated connection. Never written to ktx.yaml. */
|
||||
export const FEDERATED_CONNECTION_ID = '_ktx_federated';
|
||||
|
||||
/**
|
||||
* Drivers DuckDB can ATTACH for federation. The driver name doubles as the
|
||||
* DuckDB extension/TYPE name, so this set is the single source of truth for
|
||||
* both membership (a driver participates iff it appears here) and attach type.
|
||||
*/
|
||||
const ATTACH_COMPATIBLE_DRIVERS = new Set(['postgres', 'mysql', 'sqlite']);
|
||||
|
||||
export function attachTypeForDriver(driver: string): string {
|
||||
const normalized = driver.toLowerCase();
|
||||
if (!ATTACH_COMPATIBLE_DRIVERS.has(normalized)) {
|
||||
throw new Error(`Driver "${driver}" cannot be attached by DuckDB federation.`);
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
export interface FederatedMember {
|
||||
connectionId: string;
|
||||
driver: string;
|
||||
projectDir: string;
|
||||
connection: KtxProjectConnectionConfig;
|
||||
}
|
||||
|
||||
export interface FederatedConnectionDescriptor {
|
||||
id: typeof FEDERATED_CONNECTION_ID;
|
||||
driver: 'duckdb';
|
||||
members: FederatedMember[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Derives a virtual federated connection when a project declares 2+
|
||||
* attach-compatible databases. Returns null otherwise — single-DB and
|
||||
* incompatible projects are unaffected.
|
||||
*/
|
||||
export function deriveFederatedConnection(
|
||||
connections: Record<string, KtxProjectConnectionConfig>,
|
||||
projectDir: string,
|
||||
): FederatedConnectionDescriptor | null {
|
||||
const members: FederatedMember[] = Object.entries(connections)
|
||||
.filter(([, config]) => ATTACH_COMPATIBLE_DRIVERS.has(config.driver.toLowerCase()))
|
||||
.map(([connectionId, config]) => ({
|
||||
connectionId,
|
||||
driver: config.driver.toLowerCase(),
|
||||
projectDir,
|
||||
connection: config,
|
||||
}));
|
||||
if (members.length < 2) {
|
||||
return null;
|
||||
}
|
||||
return { id: FEDERATED_CONNECTION_ID, driver: 'duckdb', members };
|
||||
}
|
||||
|
||||
export interface FederatedConnectionListing {
|
||||
id: typeof FEDERATED_CONNECTION_ID;
|
||||
driver: 'duckdb';
|
||||
members: string[];
|
||||
hint: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Listing-facing view of the virtual federated connection for `ktx connection`
|
||||
* and MCP `connection_list`. Derived from the same declared state as
|
||||
* deriveFederatedConnection, so both surfaces describe one connection.
|
||||
*/
|
||||
export function federatedConnectionListing(
|
||||
connections: Record<string, KtxProjectConnectionConfig>,
|
||||
projectDir: string,
|
||||
): FederatedConnectionListing | null {
|
||||
const descriptor = deriveFederatedConnection(connections, projectDir);
|
||||
if (!descriptor) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
id: FEDERATED_CONNECTION_ID,
|
||||
driver: 'duckdb',
|
||||
members: descriptor.members.map((member) => member.connectionId),
|
||||
hint: 'Cross-database queries run here. Name tables connectionId.schema.table (or connectionId.table for sqlite); double-quote any id that is not a bare SQL identifier, e.g. "books-db".public.books.',
|
||||
};
|
||||
}
|
||||
87
packages/cli/src/context/connections/gdrive-config.ts
Normal file
87
packages/cli/src/context/connections/gdrive-config.ts
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
import { readFile } from 'node:fs/promises';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import type { KtxProjectConnectionConfig } from '../project/config.js';
|
||||
import type { GdrivePullConfig } from '../ingest/adapters/gdrive/types.js';
|
||||
import { gdrivePullConfigSchema } from '../ingest/adapters/gdrive/types.js';
|
||||
|
||||
type RawKtxGdriveConnectionConfig = Extract<KtxProjectConnectionConfig, { driver: 'gdrive' }>;
|
||||
|
||||
export type KtxGdriveConnectionConfig = Omit<
|
||||
RawKtxGdriveConnectionConfig,
|
||||
'service_account_key_ref' | 'folder_id' | 'recursive'
|
||||
> & {
|
||||
driver: 'gdrive';
|
||||
service_account_key_ref: string;
|
||||
folder_id: string;
|
||||
recursive: boolean;
|
||||
};
|
||||
|
||||
interface ResolveKeyOptions {
|
||||
readTextFile?: (path: string) => Promise<string>;
|
||||
}
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function expandHome(path: string): string {
|
||||
return path === '~' || path.startsWith('~/') ? resolve(homedir(), path.slice(2)) : path;
|
||||
}
|
||||
|
||||
export function parseGdriveConnectionConfig(raw: unknown): KtxGdriveConnectionConfig {
|
||||
if (!isRecord(raw)) {
|
||||
throw new Error('gdrive connection config must be an object');
|
||||
}
|
||||
if (raw.driver !== 'gdrive') {
|
||||
throw new Error('gdrive connection config requires driver: gdrive');
|
||||
}
|
||||
const keyRef =
|
||||
typeof raw.service_account_key_ref === 'string' && raw.service_account_key_ref.trim().length > 0 // pragma: allowlist secret
|
||||
? raw.service_account_key_ref.trim()
|
||||
: null;
|
||||
if (!keyRef) {
|
||||
throw new Error('gdrive connection config requires service_account_key_ref');
|
||||
}
|
||||
if (!keyRef.startsWith('file:')) {
|
||||
throw new Error('gdrive service_account_key_ref must use file:/path/to/key.json');
|
||||
}
|
||||
const folderId = typeof raw.folder_id === 'string' && raw.folder_id.trim().length > 0 ? raw.folder_id.trim() : null;
|
||||
if (!folderId) {
|
||||
throw new Error('gdrive connection config requires folder_id');
|
||||
}
|
||||
return {
|
||||
driver: 'gdrive',
|
||||
service_account_key_ref: keyRef,
|
||||
folder_id: folderId,
|
||||
recursive: raw.recursive === true,
|
||||
};
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export async function resolveGdriveServiceAccountKey(
|
||||
serviceAccountKeyRef: string,
|
||||
options: ResolveKeyOptions = {},
|
||||
): Promise<string> {
|
||||
if (!serviceAccountKeyRef.startsWith('file:')) {
|
||||
throw new Error('gdrive service_account_key_ref must use file:/path/to/key.json');
|
||||
}
|
||||
const path = expandHome(serviceAccountKeyRef.slice('file:'.length));
|
||||
const readTextFile = options.readTextFile ?? ((filePath: string) => readFile(filePath, 'utf-8'));
|
||||
const value = (await readTextFile(path)).trim();
|
||||
if (!value) {
|
||||
throw new Error(`gdrive service account key file is empty: ${path}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
export async function gdriveConnectionToPullConfig(
|
||||
config: KtxGdriveConnectionConfig,
|
||||
options: ResolveKeyOptions = {},
|
||||
): Promise<GdrivePullConfig> {
|
||||
return gdrivePullConfigSchema.parse({
|
||||
serviceAccountKey: await resolveGdriveServiceAccountKey(config.service_account_key_ref, options),
|
||||
folderId: config.folder_id,
|
||||
recursive: config.recursive,
|
||||
});
|
||||
}
|
||||
|
|
@ -1,59 +0,0 @@
|
|||
import { driverRegistrations, getDriverRegistration } from './drivers.js';
|
||||
import { createPostgresQueryExecutor } from './postgres-query-executor.js';
|
||||
import type {
|
||||
KtxSqlQueryExecutionInput,
|
||||
KtxSqlQueryExecutionResult,
|
||||
KtxSqlQueryExecutorPort,
|
||||
} from './query-executor.js';
|
||||
import { createSqliteQueryExecutor } from './sqlite-query-executor.js';
|
||||
import type { KtxConnectionDriver } from '../scan/types.js';
|
||||
|
||||
export interface DefaultLocalQueryExecutorOptions {
|
||||
postgres?: KtxSqlQueryExecutorPort;
|
||||
sqlite?: KtxSqlQueryExecutorPort;
|
||||
}
|
||||
|
||||
function driverFor(input: KtxSqlQueryExecutionInput): string {
|
||||
return String(input.connection?.driver ?? '').toLowerCase();
|
||||
}
|
||||
|
||||
function localExecutorMap(
|
||||
options: DefaultLocalQueryExecutorOptions,
|
||||
): Partial<Record<KtxConnectionDriver, KtxSqlQueryExecutorPort>> {
|
||||
const wiredExecutors: Partial<Record<KtxConnectionDriver, KtxSqlQueryExecutorPort>> = {
|
||||
postgres: options.postgres ?? createPostgresQueryExecutor(),
|
||||
sqlite: options.sqlite ?? createSqliteQueryExecutor(),
|
||||
};
|
||||
|
||||
const executors: Partial<Record<KtxConnectionDriver, KtxSqlQueryExecutorPort>> = {};
|
||||
for (const registration of Object.values(driverRegistrations)) {
|
||||
if (!registration.hasLocalQueryExecutor) continue;
|
||||
const executor = wiredExecutors[registration.driver];
|
||||
if (executor) {
|
||||
executors[registration.driver] = executor;
|
||||
}
|
||||
}
|
||||
return executors;
|
||||
}
|
||||
|
||||
export function createDefaultLocalQueryExecutor(options: DefaultLocalQueryExecutorOptions = {}): KtxSqlQueryExecutorPort {
|
||||
const executors = localExecutorMap(options);
|
||||
|
||||
return {
|
||||
async execute(input: KtxSqlQueryExecutionInput): Promise<KtxSqlQueryExecutionResult> {
|
||||
const driver = driverFor(input);
|
||||
const registration = getDriverRegistration(driver);
|
||||
if (!registration?.hasLocalQueryExecutor) {
|
||||
throw new Error(`No local query executor is configured for driver "${input.connection?.driver ?? 'unknown'}".`);
|
||||
}
|
||||
|
||||
const executor = executors[registration.driver];
|
||||
if (!executor) {
|
||||
throw new Error(
|
||||
`Local query executor flag is enabled for driver "${registration.driver}", but no executor factory is wired.`,
|
||||
);
|
||||
}
|
||||
return executor.execute(input);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -16,6 +16,8 @@ export interface LocalConnectionInfo {
|
|||
id: string;
|
||||
name: string;
|
||||
connectionType: string;
|
||||
members?: string[];
|
||||
hint?: string;
|
||||
}
|
||||
|
||||
const DRIVER_TO_CONNECTION_TYPE: Record<string, ConnectionType> = {
|
||||
|
|
|
|||
|
|
@ -1,78 +0,0 @@
|
|||
import { Client, type ClientConfig } from 'pg';
|
||||
import type {
|
||||
KtxSqlQueryExecutionInput,
|
||||
KtxSqlQueryExecutionResult,
|
||||
KtxSqlQueryExecutorPort,
|
||||
} from './query-executor.js';
|
||||
import { limitSqlForExecution } from './read-only-sql.js';
|
||||
|
||||
interface PgClientLike {
|
||||
connect(): Promise<unknown>;
|
||||
query(input: string | { text: string; rowMode: 'array' }): Promise<{
|
||||
fields: Array<{ name: string }>;
|
||||
rows: unknown[][];
|
||||
command: string;
|
||||
rowCount: number | null;
|
||||
}>;
|
||||
end(): Promise<void>;
|
||||
}
|
||||
|
||||
interface PostgresQueryExecutorOptions {
|
||||
statementTimeoutMs?: number;
|
||||
queryTimeoutMs?: number;
|
||||
connectionTimeoutMs?: number;
|
||||
clientFactory?: (config: ClientConfig) => PgClientLike;
|
||||
}
|
||||
|
||||
function connectionDriver(input: KtxSqlQueryExecutionInput): string {
|
||||
return String(input.connection?.driver ?? '').toLowerCase();
|
||||
}
|
||||
|
||||
function createDefaultClient(config: ClientConfig): PgClientLike {
|
||||
return new Client(config);
|
||||
}
|
||||
|
||||
export function createPostgresQueryExecutor(options: PostgresQueryExecutorOptions = {}): KtxSqlQueryExecutorPort {
|
||||
const clientFactory = options.clientFactory ?? createDefaultClient;
|
||||
return {
|
||||
async execute(input: KtxSqlQueryExecutionInput): Promise<KtxSqlQueryExecutionResult> {
|
||||
const driver = connectionDriver(input);
|
||||
const connection = input.connection;
|
||||
if (driver !== 'postgres') {
|
||||
throw new Error(`Local Postgres execution cannot run driver "${connection?.driver ?? 'unknown'}".`);
|
||||
}
|
||||
if (typeof connection?.url !== 'string' || connection.url.trim().length === 0) {
|
||||
throw new Error(`Local Postgres execution requires connections.${input.connectionId}.url.`);
|
||||
}
|
||||
|
||||
const client = clientFactory({
|
||||
connectionString: connection.url,
|
||||
statement_timeout: options.statementTimeoutMs ?? 30_000,
|
||||
query_timeout: options.queryTimeoutMs ?? 35_000,
|
||||
connectionTimeoutMillis: options.connectionTimeoutMs ?? 5_000,
|
||||
application_name: 'ktx-local-query',
|
||||
});
|
||||
await client.connect();
|
||||
try {
|
||||
await client.query('BEGIN READ ONLY');
|
||||
const result = await client.query({
|
||||
text: limitSqlForExecution(input.sql, input.maxRows),
|
||||
rowMode: 'array',
|
||||
});
|
||||
await client.query('COMMIT');
|
||||
return {
|
||||
headers: result.fields.map((field) => field.name),
|
||||
rows: result.rows,
|
||||
totalRows: result.rows.length,
|
||||
command: result.command,
|
||||
rowCount: result.rowCount,
|
||||
};
|
||||
} catch (error) {
|
||||
await client.query('ROLLBACK').catch(() => undefined);
|
||||
throw error;
|
||||
} finally {
|
||||
await client.end();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
58
packages/cli/src/context/connections/project-sql-executor.ts
Normal file
58
packages/cli/src/context/connections/project-sql-executor.ts
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
import { executeFederatedQuery } from '../../connectors/duckdb/federated-executor.js';
|
||||
import type { KtxLocalProject } from '../project/project.js';
|
||||
import type { KtxScanConnector, KtxScanContext } from '../scan/types.js';
|
||||
import { deriveFederatedConnection, FEDERATED_CONNECTION_ID } from './federation.js';
|
||||
import type { KtxSqlQueryExecutionInput, KtxSqlQueryExecutionResult } from './query-executor.js';
|
||||
|
||||
export interface ExecuteProjectReadOnlySqlDeps {
|
||||
project: KtxLocalProject;
|
||||
input: KtxSqlQueryExecutionInput;
|
||||
createConnector: (connectionId: string) => Promise<KtxScanConnector> | KtxScanConnector;
|
||||
executeFederated?: typeof executeFederatedQuery;
|
||||
runId?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Single resolve-and-execute path for project read-only SQL. The federated
|
||||
* connection is derived from declared state here so every executor entry point
|
||||
* routes `_ktx_federated` identically; standard connections go through the
|
||||
* scan connector.
|
||||
*/
|
||||
export async function executeProjectReadOnlySql(
|
||||
deps: ExecuteProjectReadOnlySqlDeps,
|
||||
): Promise<KtxSqlQueryExecutionResult> {
|
||||
const { project, input } = deps;
|
||||
if (input.connectionId === FEDERATED_CONNECTION_ID) {
|
||||
const descriptor = deriveFederatedConnection(project.config.connections, project.projectDir);
|
||||
if (!descriptor) {
|
||||
throw new Error('Federated execution requested but fewer than 2 attach-compatible connections exist.');
|
||||
}
|
||||
const runFederated = deps.executeFederated ?? executeFederatedQuery;
|
||||
return runFederated(descriptor.members, input);
|
||||
}
|
||||
|
||||
let connector: KtxScanConnector | null = null;
|
||||
try {
|
||||
connector = await deps.createConnector(input.connectionId);
|
||||
if (!connector.capabilities.readOnlySql || !connector.executeReadOnly) {
|
||||
throw new Error(
|
||||
`Connection "${input.connectionId}" driver "${connector.driver}" does not support read-only SQL execution.`,
|
||||
);
|
||||
}
|
||||
const ctx: KtxScanContext = { runId: deps.runId ?? 'sql-execution' };
|
||||
const result = await connector.executeReadOnly(
|
||||
{ connectionId: input.connectionId, sql: input.sql, maxRows: input.maxRows },
|
||||
ctx,
|
||||
);
|
||||
return {
|
||||
headers: result.headers,
|
||||
...(result.headerTypes ? { headerTypes: result.headerTypes } : {}),
|
||||
rows: result.rows,
|
||||
totalRows: result.totalRows,
|
||||
command: 'SELECT',
|
||||
rowCount: result.rowCount,
|
||||
};
|
||||
} finally {
|
||||
await connector?.cleanup?.();
|
||||
}
|
||||
}
|
||||
|
|
@ -10,6 +10,7 @@ export interface KtxSqlQueryExecutionInput {
|
|||
|
||||
export interface KtxSqlQueryExecutionResult {
|
||||
headers: string[];
|
||||
headerTypes?: string[];
|
||||
rows: unknown[][];
|
||||
totalRows: number;
|
||||
command: string;
|
||||
|
|
|
|||
|
|
@ -1,22 +1,297 @@
|
|||
import { KtxQueryError } from '../../errors.js';
|
||||
|
||||
const MUTATING_SQL =
|
||||
/^\s*(insert|update|delete|merge|alter|drop|create|truncate|grant|revoke|copy|call|do|vacuum|analyze|refresh)\b/i;
|
||||
const READ_SQL = /^\s*(select|with)\b/i;
|
||||
|
||||
export function assertReadOnlySql(sql: string): string {
|
||||
const trimmed = sql.trim();
|
||||
if (!READ_SQL.test(trimmed) || MUTATING_SQL.test(trimmed)) {
|
||||
throw new Error('Only read-only SELECT/WITH queries can be executed locally.');
|
||||
// Agents (and the daemon's sqlglot validator, which ignores comments) routinely
|
||||
// emit read-only queries prefixed with `-- ...` or `/* ... */`. Strip leading
|
||||
// comments so the prefix check sees the real statement; otherwise valid SELECT/WITH
|
||||
// SQL is rejected here while the parser-backed validator accepts it.
|
||||
function stripLeadingSqlComments(sql: string): string {
|
||||
let index = 0;
|
||||
while (index < sql.length) {
|
||||
while (/\s/.test(sql[index] ?? '')) {
|
||||
index += 1;
|
||||
}
|
||||
if (sql.startsWith('--', index)) {
|
||||
const end = sql.indexOf('\n', index + 2);
|
||||
index = end === -1 ? sql.length : end + 1;
|
||||
continue;
|
||||
}
|
||||
if (sql.startsWith('/*', index)) {
|
||||
const end = sql.indexOf('*/', index + 2);
|
||||
if (end === -1) {
|
||||
return sql.slice(index);
|
||||
}
|
||||
index = end + 2;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return sql.slice(index);
|
||||
}
|
||||
|
||||
// Lexes past one string literal, quoted identifier, or comment starting at
|
||||
// `index`, using standard-SQL rules ('' and "" escapes; no dialect extensions
|
||||
// such as backslash escapes or dollar quoting). Returns the index after the
|
||||
// token, or `index` unchanged when no quoted/comment token starts there.
|
||||
function skipQuotedOrComment(sql: string, index: number): number {
|
||||
const quote = sql[index];
|
||||
if (quote === "'" || quote === '"') {
|
||||
let i = index + 1;
|
||||
while (i < sql.length) {
|
||||
if (sql[i] === quote) {
|
||||
if (sql[i + 1] === quote) {
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
return i + 1;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
return sql.length;
|
||||
}
|
||||
if (sql.startsWith('--', index)) {
|
||||
const end = sql.indexOf('\n', index + 2);
|
||||
return end === -1 ? sql.length : end + 1;
|
||||
}
|
||||
if (sql.startsWith('/*', index)) {
|
||||
const end = sql.indexOf('*/', index + 2);
|
||||
return end === -1 ? sql.length : end + 2;
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
// Backstop against statement smuggling (`select 1; drop table x`): reject any
|
||||
// semicolon that is followed by real content. Semicolons inside string
|
||||
// literals, quoted identifiers, and comments are fine, as are trailing
|
||||
// semicolons (optionally followed by whitespace and comments). This deliberately
|
||||
// lexes standard SQL only, so dialect-specific escapes can cause a false
|
||||
// reject — never a false accept; the canonical gate is the daemon's
|
||||
// sqlglot-backed validateReadOnly.
|
||||
function assertSingleSqlStatement(sql: string): void {
|
||||
let index = 0;
|
||||
let sawSemicolon = false;
|
||||
while (index < sql.length) {
|
||||
const skipped = skipQuotedOrComment(sql, index);
|
||||
if (skipped > index) {
|
||||
index = skipped;
|
||||
continue;
|
||||
}
|
||||
if (sql[index] === ';') {
|
||||
sawSemicolon = true;
|
||||
} else if (sawSemicolon && !/\s/.test(sql[index])) {
|
||||
throw new KtxQueryError('Only one SQL statement can be executed.');
|
||||
}
|
||||
index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
export function assertReadOnlySql(sql: string): string {
|
||||
const trimmed = stripLeadingSqlComments(sql).trim();
|
||||
if (!READ_SQL.test(trimmed) || MUTATING_SQL.test(trimmed)) {
|
||||
throw new KtxQueryError('Only read-only SELECT/WITH queries can be executed locally.');
|
||||
}
|
||||
assertSingleSqlStatement(trimmed);
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
function isSqlIdentifierPart(char: string | undefined): boolean {
|
||||
return char !== undefined && /[A-Za-z0-9_$]/.test(char);
|
||||
}
|
||||
|
||||
function keywordAt(sql: string, index: number, keyword: string): boolean {
|
||||
if (sql.slice(index, index + keyword.length).toLowerCase() !== keyword.toLowerCase()) {
|
||||
return false;
|
||||
}
|
||||
return !isSqlIdentifierPart(sql[index - 1]) && !isSqlIdentifierPart(sql[index + keyword.length]);
|
||||
}
|
||||
|
||||
function skipWhitespaceAndComments(sql: string, index: number): number {
|
||||
let current = index;
|
||||
while (current < sql.length) {
|
||||
while (/\s/.test(sql[current] ?? '')) {
|
||||
current += 1;
|
||||
}
|
||||
if (sql.startsWith('--', current) || sql.startsWith('/*', current)) {
|
||||
current = skipQuotedOrComment(sql, current);
|
||||
continue;
|
||||
}
|
||||
return current;
|
||||
}
|
||||
return current;
|
||||
}
|
||||
|
||||
function skipBracketIdentifier(sql: string, index: number): number {
|
||||
let current = index + 1;
|
||||
while (current < sql.length) {
|
||||
if (sql[current] === ']') {
|
||||
if (sql[current + 1] === ']') {
|
||||
current += 2;
|
||||
continue;
|
||||
}
|
||||
return current + 1;
|
||||
}
|
||||
current += 1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
function skipBacktickIdentifier(sql: string, index: number): number {
|
||||
let current = index + 1;
|
||||
while (current < sql.length) {
|
||||
if (sql[current] === '`') {
|
||||
if (sql[current + 1] === '`') {
|
||||
current += 2;
|
||||
continue;
|
||||
}
|
||||
return current + 1;
|
||||
}
|
||||
current += 1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
function skipIdentifier(sql: string, index: number): number {
|
||||
if (sql[index] === '"') {
|
||||
const skipped = skipQuotedOrComment(sql, index);
|
||||
return skipped > index ? skipped : -1;
|
||||
}
|
||||
if (sql[index] === '[') {
|
||||
return skipBracketIdentifier(sql, index);
|
||||
}
|
||||
if (sql[index] === '`') {
|
||||
return skipBacktickIdentifier(sql, index);
|
||||
}
|
||||
let current = index;
|
||||
while (isSqlIdentifierPart(sql[current])) {
|
||||
current += 1;
|
||||
}
|
||||
return current > index ? current : -1;
|
||||
}
|
||||
|
||||
function skipBalancedParentheses(sql: string, index: number): number {
|
||||
if (sql[index] !== '(') {
|
||||
return -1;
|
||||
}
|
||||
|
||||
let current = index;
|
||||
let depth = 0;
|
||||
while (current < sql.length) {
|
||||
const skipped = skipQuotedOrComment(sql, current);
|
||||
if (skipped > current) {
|
||||
current = skipped;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (sql[current] === '(') {
|
||||
depth += 1;
|
||||
} else if (sql[current] === ')') {
|
||||
depth -= 1;
|
||||
if (depth === 0) {
|
||||
return current + 1;
|
||||
}
|
||||
}
|
||||
current += 1;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function hoistLeadingCte(sql: string): { withPrefix: string; body: string } {
|
||||
const trimmed = sql.trim();
|
||||
if (!keywordAt(trimmed, 0, 'with')) {
|
||||
return { withPrefix: '', body: sql };
|
||||
}
|
||||
|
||||
let current = skipWhitespaceAndComments(trimmed, 4);
|
||||
if (keywordAt(trimmed, current, 'recursive')) {
|
||||
current = skipWhitespaceAndComments(trimmed, current + 'recursive'.length);
|
||||
}
|
||||
|
||||
while (current < trimmed.length) {
|
||||
current = skipIdentifier(trimmed, current);
|
||||
if (current < 0) {
|
||||
return { withPrefix: '', body: trimmed };
|
||||
}
|
||||
|
||||
current = skipWhitespaceAndComments(trimmed, current);
|
||||
if (trimmed[current] === '(') {
|
||||
current = skipBalancedParentheses(trimmed, current);
|
||||
if (current < 0) {
|
||||
return { withPrefix: '', body: trimmed };
|
||||
}
|
||||
current = skipWhitespaceAndComments(trimmed, current);
|
||||
}
|
||||
|
||||
if (!keywordAt(trimmed, current, 'as')) {
|
||||
return { withPrefix: '', body: trimmed };
|
||||
}
|
||||
|
||||
current = skipWhitespaceAndComments(trimmed, current + 2);
|
||||
current = skipBalancedParentheses(trimmed, current);
|
||||
if (current < 0) {
|
||||
return { withPrefix: '', body: trimmed };
|
||||
}
|
||||
|
||||
current = skipWhitespaceAndComments(trimmed, current);
|
||||
if (trimmed[current] === ',') {
|
||||
current = skipWhitespaceAndComments(trimmed, current + 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
const body = trimmed.slice(current).trimStart();
|
||||
if (!body) {
|
||||
return { withPrefix: '', body: trimmed };
|
||||
}
|
||||
return { withPrefix: `${trimmed.slice(0, current).trimEnd()} `, body };
|
||||
}
|
||||
|
||||
return { withPrefix: '', body: trimmed };
|
||||
}
|
||||
|
||||
// `assertReadOnlySql` deliberately keeps trailing semicolons, comments, and
|
||||
// whitespace (e.g. `select 1; -- done`) — harmless for direct single-statement
|
||||
// execution. A row-limit subquery wrapper needs a bare expression instead: a
|
||||
// trailing `;` would sit illegally inside the subquery, and a trailing line
|
||||
// comment would comment out the closing paren and limit clause. Lex forward with
|
||||
// the same standard-SQL rules as the single-statement gate and truncate at the
|
||||
// end of the last meaningful token, dropping trailing semicolons, comments, and
|
||||
// whitespace. Characters inside string literals and quoted identifiers stay
|
||||
// meaningful, so a `;` or `--` within a literal is never mistaken for a
|
||||
// terminator (a plain regex cannot make that distinction).
|
||||
export function stripTrailingSqlNoise(sql: string): string {
|
||||
let index = 0;
|
||||
let meaningfulEnd = 0;
|
||||
while (index < sql.length) {
|
||||
if (sql.startsWith('--', index) || sql.startsWith('/*', index)) {
|
||||
index = skipQuotedOrComment(sql, index);
|
||||
continue;
|
||||
}
|
||||
const afterQuoted = skipQuotedOrComment(sql, index);
|
||||
if (afterQuoted > index) {
|
||||
meaningfulEnd = afterQuoted;
|
||||
index = afterQuoted;
|
||||
continue;
|
||||
}
|
||||
if (sql[index] !== ';' && !/\s/.test(sql[index] ?? '')) {
|
||||
meaningfulEnd = index + 1;
|
||||
}
|
||||
index += 1;
|
||||
}
|
||||
return sql.slice(0, meaningfulEnd);
|
||||
}
|
||||
|
||||
export function limitSqlForExecution(sql: string, maxRows: number | undefined): string {
|
||||
const trimmed = assertReadOnlySql(sql).replace(/;+\s*$/, '');
|
||||
const trimmed = stripTrailingSqlNoise(assertReadOnlySql(sql));
|
||||
if (!maxRows) {
|
||||
return trimmed;
|
||||
}
|
||||
if (!Number.isInteger(maxRows) || maxRows <= 0) {
|
||||
throw new Error('maxRows must be a positive integer.');
|
||||
throw new KtxQueryError('maxRows must be a positive integer.');
|
||||
}
|
||||
return `select * from (${trimmed}) as ktx_query_result limit ${maxRows}`;
|
||||
const { withPrefix, body } = hoistLeadingCte(trimmed);
|
||||
return `${withPrefix}select * from (${body}) as ktx_query_result limit ${maxRows}`;
|
||||
}
|
||||
|
|
|
|||
50
packages/cli/src/context/connections/resolve-connection.ts
Normal file
50
packages/cli/src/context/connections/resolve-connection.ts
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
import { KtxExpectedError } from '../../errors.js';
|
||||
import type { KtxProjectConfig, KtxProjectConnectionConfig } from '../project/config.js';
|
||||
|
||||
function configuredConnectionIds(config: KtxProjectConfig): string[] {
|
||||
return Object.keys(config.connections).sort();
|
||||
}
|
||||
|
||||
function availableConnectionsHint(config: KtxProjectConfig): string {
|
||||
const ids = configuredConnectionIds(config);
|
||||
return ids.length === 0
|
||||
? 'No connections are configured in ktx.yaml.'
|
||||
: `Configured connections: ${ids.join(', ')}.`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Look up a connection by id, throwing an expected (caller-driven) error that
|
||||
* names the configured connections so an agent or CLI user can self-correct.
|
||||
*/
|
||||
export function resolveConfiguredConnection(
|
||||
config: KtxProjectConfig,
|
||||
connectionId: string,
|
||||
): KtxProjectConnectionConfig {
|
||||
const connection = config.connections[connectionId];
|
||||
if (!connection) {
|
||||
throw new KtxExpectedError(
|
||||
`Connection "${connectionId}" is not configured in ktx.yaml. ${availableConnectionsHint(config)}`,
|
||||
);
|
||||
}
|
||||
return connection;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the connection id to run against: validate a requested id against the
|
||||
* configured connections, or default to the sole connection when none is given.
|
||||
* Throws an expected error that lists the configured connections otherwise.
|
||||
*/
|
||||
export function resolveRequiredConnectionId(
|
||||
config: KtxProjectConfig,
|
||||
requested: string | undefined,
|
||||
): string {
|
||||
if (requested !== undefined) {
|
||||
resolveConfiguredConnection(config, requested);
|
||||
return requested;
|
||||
}
|
||||
const ids = configuredConnectionIds(config);
|
||||
if (ids.length === 1) {
|
||||
return ids[0];
|
||||
}
|
||||
throw new KtxExpectedError(`connectionId is required. ${availableConnectionsHint(config)}`);
|
||||
}
|
||||
|
|
@ -1,92 +0,0 @@
|
|||
import { isAbsolute, resolve } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import Database from 'better-sqlite3';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import type {
|
||||
KtxSqlQueryExecutionInput,
|
||||
KtxSqlQueryExecutionResult,
|
||||
KtxSqlQueryExecutorPort,
|
||||
} from './query-executor.js';
|
||||
import { normalizeQueryRows } from './query-executor.js';
|
||||
import { limitSqlForExecution } from './read-only-sql.js';
|
||||
|
||||
type SqliteConnectionConfig = Record<string, unknown> | undefined;
|
||||
|
||||
function connectionDriver(input: KtxSqlQueryExecutionInput): string {
|
||||
return String(input.connection?.driver ?? '').toLowerCase();
|
||||
}
|
||||
|
||||
function stringConfigValue(connection: SqliteConnectionConfig, key: string): string | undefined {
|
||||
const value = connection?.[key];
|
||||
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(key, value.trim()) : undefined;
|
||||
}
|
||||
|
||||
function resolveStringReference(key: string, value: string): string {
|
||||
if (value.startsWith('env:')) {
|
||||
return process.env[value.slice('env:'.length)] ?? '';
|
||||
}
|
||||
if (key !== 'url' && value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function sqlitePathFromUrl(url: string): string {
|
||||
if (url.startsWith('file:')) {
|
||||
return fileURLToPath(url);
|
||||
}
|
||||
|
||||
if (url.startsWith('sqlite:')) {
|
||||
const parsed = new URL(url);
|
||||
if (parsed.pathname.length > 0) {
|
||||
return decodeURIComponent(parsed.pathname);
|
||||
}
|
||||
}
|
||||
|
||||
return url;
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function sqliteDatabasePathFromConnection(input: KtxSqlQueryExecutionInput): string {
|
||||
const driver = connectionDriver(input);
|
||||
if (driver !== 'sqlite') {
|
||||
throw new Error(`Local SQLite execution cannot run driver "${input.connection?.driver ?? 'unknown'}".`);
|
||||
}
|
||||
|
||||
const pathValue = stringConfigValue(input.connection, 'path');
|
||||
const urlValue = stringConfigValue(input.connection, 'url');
|
||||
if (!pathValue && !urlValue) {
|
||||
throw new Error(
|
||||
`Local SQLite execution requires connections.${input.connectionId}.path or connections.${input.connectionId}.url.`,
|
||||
);
|
||||
}
|
||||
|
||||
const candidate = pathValue ?? sqlitePathFromUrl(urlValue as string);
|
||||
return isAbsolute(candidate) ? candidate : resolve(input.projectDir ?? process.cwd(), candidate);
|
||||
}
|
||||
|
||||
export function createSqliteQueryExecutor(): KtxSqlQueryExecutorPort {
|
||||
return {
|
||||
async execute(input: KtxSqlQueryExecutionInput): Promise<KtxSqlQueryExecutionResult> {
|
||||
const sql = limitSqlForExecution(input.sql, input.maxRows);
|
||||
const dbPath = sqliteDatabasePathFromConnection(input);
|
||||
const db = new Database(dbPath, { readonly: true, fileMustExist: true });
|
||||
try {
|
||||
const statement = db.prepare(sql);
|
||||
const rows = statement.all() as unknown[];
|
||||
return {
|
||||
headers: statement.columns().map((column) => column.name),
|
||||
rows: normalizeQueryRows(rows),
|
||||
totalRows: rows.length,
|
||||
command: 'SELECT',
|
||||
rowCount: rows.length,
|
||||
};
|
||||
} finally {
|
||||
db.close();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
39
packages/cli/src/context/core/abort.ts
Normal file
39
packages/cli/src/context/core/abort.ts
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
/** @internal */
|
||||
export function createAbortError(message = 'Aborted'): DOMException {
|
||||
return new DOMException(message, 'AbortError');
|
||||
}
|
||||
|
||||
export function isAbortError(error: unknown): boolean {
|
||||
if (error instanceof DOMException && error.name === 'AbortError') {
|
||||
return true;
|
||||
}
|
||||
if (!error || typeof error !== 'object') {
|
||||
return false;
|
||||
}
|
||||
const record = error as { name?: unknown; code?: unknown };
|
||||
return record.name === 'AbortError' || record.code === 'ABORT_ERR';
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function throwIfAborted(signal?: AbortSignal): void {
|
||||
if (signal?.aborted) {
|
||||
throw createAbortError();
|
||||
}
|
||||
}
|
||||
|
||||
export function linkAbortSignal(parent?: AbortSignal): { controller: AbortController; dispose: () => void } {
|
||||
const controller = new AbortController();
|
||||
if (!parent) {
|
||||
return { controller, dispose: () => undefined };
|
||||
}
|
||||
if (parent.aborted) {
|
||||
controller.abort(createAbortError());
|
||||
return { controller, dispose: () => undefined };
|
||||
}
|
||||
const onAbort = () => controller.abort(createAbortError());
|
||||
parent.addEventListener('abort', onAbort, { once: true });
|
||||
return {
|
||||
controller,
|
||||
dispose: () => parent.removeEventListener('abort', onAbort),
|
||||
};
|
||||
}
|
||||
|
|
@ -24,6 +24,25 @@ function sanitizedGitEnv(env: NodeJS.ProcessEnv = process.env): NodeJS.ProcessEn
|
|||
return sanitized;
|
||||
}
|
||||
|
||||
export function createSimpleGit(baseDir: string): SimpleGit {
|
||||
return simpleGit({ baseDir, unsafe: { allowUnsafeAskPass: true } }).env(sanitizedGitEnv());
|
||||
/**
|
||||
* Create a simple-git client scoped to `baseDir`. When an identity is provided, ktx's own
|
||||
* commits carry it through the GIT_AUTHOR and GIT_COMMITTER environment variables instead of
|
||||
* relying on repo-local or global git config. This keeps commits working when the project
|
||||
* directory is an existing repo ktx did not create and the machine has no configured git
|
||||
* identity (e.g. a fresh Mac with no ~/.gitconfig), without mutating the user's repo config.
|
||||
* Explicit `--author` flags on individual commits still take precedence over GIT_AUTHOR_NAME.
|
||||
*
|
||||
* `commit.gpgsign=false` is injected as a per-invocation `-c` override so ktx's commits never
|
||||
* attempt GPG signing: ktx commits under a synthetic identity that can never own a secret key, so
|
||||
* a user's `commit.gpgsign=true` would otherwise fail every commit with "No secret key".
|
||||
*/
|
||||
export function createSimpleGit(baseDir: string, identity?: { name: string; email: string }): SimpleGit {
|
||||
const env = sanitizedGitEnv();
|
||||
if (identity?.name && identity.email) {
|
||||
env.GIT_AUTHOR_NAME = identity.name;
|
||||
env.GIT_AUTHOR_EMAIL = identity.email;
|
||||
env.GIT_COMMITTER_NAME = identity.name;
|
||||
env.GIT_COMMITTER_EMAIL = identity.email;
|
||||
}
|
||||
return simpleGit({ baseDir, config: ['commit.gpgsign=false'], unsafe: { allowUnsafeAskPass: true } }).env(env);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,6 +27,58 @@ export interface WorktreeEntry {
|
|||
head: string | null;
|
||||
}
|
||||
|
||||
export type KtxRepoOwnership = 'unowned' | 'ktx-managed' | 'foreign';
|
||||
|
||||
export class KtxForeignGitRepositoryError extends Error {
|
||||
constructor(configDir: string) {
|
||||
super(
|
||||
`${configDir} is already a git repository that ktx did not create. ` +
|
||||
'ktx maintains its context in a repository it owns; run ktx in a dedicated directory or move the existing repository aside.',
|
||||
);
|
||||
this.name = 'KtxForeignGitRepositoryError';
|
||||
}
|
||||
}
|
||||
|
||||
function isNodeErrnoException(error: unknown): error is NodeJS.ErrnoException {
|
||||
return error instanceof Error && 'code' in error;
|
||||
}
|
||||
|
||||
/**
|
||||
* Classify whether ktx may own a git repository rooted exactly at `dir`. A root
|
||||
* `ktx.yaml` is the ownership signal; the working tree decides, not git history,
|
||||
* because older ktx versions left `ktx.yaml` uncommitted (it holds secret refs).
|
||||
*
|
||||
* - `unowned`: no repo here (including a missing or non-directory path) → ktx may `git init`.
|
||||
* - `ktx-managed`: `<dir>/.git` is a directory and `ktx.yaml` sits at the root.
|
||||
* - `foreign`: any other repo — no root `ktx.yaml`, or a `.git` *file* (a linked
|
||||
* worktree). ktx must never adopt or mutate it.
|
||||
*
|
||||
* Reads only `<dir>` itself; never walks up, so a parent repo cannot change the answer.
|
||||
*/
|
||||
export async function classifyKtxRepoOwnership(dir: string): Promise<KtxRepoOwnership> {
|
||||
let dotGitIsDirectory: boolean;
|
||||
try {
|
||||
dotGitIsDirectory = (await fs.lstat(join(dir, '.git'))).isDirectory();
|
||||
} catch (error) {
|
||||
// ENOENT: `<dir>/.git` is absent. ENOTDIR: `<dir>` itself is a file, so it
|
||||
// can hold no repo. Either way there is nothing for ktx to avoid here.
|
||||
if (isNodeErrnoException(error) && (error.code === 'ENOENT' || error.code === 'ENOTDIR')) {
|
||||
return 'unowned';
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
if (!dotGitIsDirectory) {
|
||||
return 'foreign';
|
||||
}
|
||||
try {
|
||||
// stat (not lstat): follow symlinks, matching what `loadKtxProject`'s
|
||||
// readFile accepts — a dir that loads as a ktx project classifies as one.
|
||||
return (await fs.stat(join(dir, 'ktx.yaml'))).isFile() ? 'ktx-managed' : 'foreign';
|
||||
} catch {
|
||||
return 'foreign';
|
||||
}
|
||||
}
|
||||
|
||||
export type SquashMergeResult =
|
||||
| { ok: true; squashSha: string; touchedPaths: string[] }
|
||||
| { ok: false; conflict: true; conflictPaths: string[] };
|
||||
|
|
@ -85,8 +137,12 @@ export class GitService {
|
|||
await fs.mkdir(this.configDir, { recursive: true });
|
||||
this.logger.log(`Config directory ensured at: ${this.configDir}`);
|
||||
|
||||
// Initialize simple-git
|
||||
this.git = createSimpleGit(this.configDir);
|
||||
// Initialize simple-git. Carry ktx's identity in the environment so commits succeed even
|
||||
// when this repo already exists and the machine has no configured git identity.
|
||||
this.git = createSimpleGit(this.configDir, {
|
||||
name: this.config.git.userName,
|
||||
email: this.config.git.userEmail,
|
||||
});
|
||||
|
||||
// Initialize git repository
|
||||
await this.initialize();
|
||||
|
|
@ -94,16 +150,16 @@ export class GitService {
|
|||
|
||||
private async initialize(): Promise<void> {
|
||||
try {
|
||||
// Check if already initialized
|
||||
const isRepo = await this.git.checkIsRepo();
|
||||
const ownership = await classifyKtxRepoOwnership(this.configDir);
|
||||
|
||||
if (!isRepo) {
|
||||
await this.git.init();
|
||||
const gitConfig = this.config.git;
|
||||
await this.git.addConfig('user.name', gitConfig.userName);
|
||||
await this.git.addConfig('user.email', gitConfig.userEmail);
|
||||
this.logger.log('Initialized git repository');
|
||||
if (ownership === 'foreign') {
|
||||
throw new KtxForeignGitRepositoryError(this.configDir);
|
||||
}
|
||||
if (ownership === 'unowned') {
|
||||
await this.git.init();
|
||||
this.logger.log('Initialized ktx-managed git repository');
|
||||
}
|
||||
// ownership === 'ktx-managed' → ktx's own repo; proceed with the normal re-run path.
|
||||
|
||||
// Keep any auto-maintenance triggered by writes in-process. Detached maintenance can
|
||||
// keep object-pack directories alive briefly after awaited git commands complete,
|
||||
|
|
@ -124,8 +180,17 @@ export class GitService {
|
|||
this.logger.log('Wrote bootstrap commit to config repo');
|
||||
}
|
||||
} catch (error) {
|
||||
// The foreign-repo error is already typed and actionable; surface it verbatim so every
|
||||
// command that loads the project shows the same clear guidance instead of a generic wrapper.
|
||||
if (error instanceof KtxForeignGitRepositoryError) {
|
||||
throw error;
|
||||
}
|
||||
this.logger.error('Failed to initialize git repository', error);
|
||||
throw new Error('Failed to initialize git repository');
|
||||
// Preserve the underlying git error: the generic message alone is undiagnosable in
|
||||
// telemetry and unactionable for the user. The exception reporter walks `cause` and
|
||||
// redacts secrets before send.
|
||||
const detail = error instanceof Error ? error.message : String(error);
|
||||
throw new Error(`Failed to initialize git repository: ${detail}`, { cause: error });
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -547,12 +612,13 @@ export class GitService {
|
|||
}
|
||||
|
||||
/**
|
||||
* List all paths under the working tree that match `pathSpec`, scoped to HEAD.
|
||||
* Used for the reconciler's first-ever run when there's no watermark to diff from.
|
||||
* List all paths matching `pathSpec` as they exist at `commitHash`. Reads from
|
||||
* git object storage, so it's safe against concurrent working-tree mutations
|
||||
* and can recover paths (e.g. a human-renamed file) that no longer exist on disk.
|
||||
*/
|
||||
async listFilesAtHead(pathSpec: string): Promise<string[]> {
|
||||
async listFilesAtCommit(pathSpec: string, commitHash: string): Promise<string[]> {
|
||||
try {
|
||||
const raw = await this.git.raw(['ls-tree', '-r', '-z', '--name-only', 'HEAD', '--', pathSpec]);
|
||||
const raw = await this.git.raw(['ls-tree', '-r', '-z', '--name-only', commitHash, '--', pathSpec]);
|
||||
if (!raw) {
|
||||
return [];
|
||||
}
|
||||
|
|
@ -562,6 +628,14 @@ export class GitService {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* List all paths under the working tree that match `pathSpec`, scoped to HEAD.
|
||||
* Used for the reconciler's first-ever run when there's no watermark to diff from.
|
||||
*/
|
||||
async listFilesAtHead(pathSpec: string): Promise<string[]> {
|
||||
return this.listFilesAtCommit(pathSpec, 'HEAD');
|
||||
}
|
||||
|
||||
/**
|
||||
* Collapse all commits between `preHead` and current HEAD into a single commit with the given
|
||||
* message. Used by the memory agent to squash N per-tool-call commits into one ingest commit.
|
||||
|
|
@ -899,7 +973,10 @@ export class GitService {
|
|||
*/
|
||||
forWorktree(workdir: string): GitService {
|
||||
const scoped = new GitService(this.config, this.logger);
|
||||
scoped.git = createSimpleGit(workdir);
|
||||
scoped.git = createSimpleGit(workdir, {
|
||||
name: this.config.git.userName,
|
||||
email: this.config.git.userEmail,
|
||||
});
|
||||
scoped.configDir = workdir;
|
||||
return scoped;
|
||||
}
|
||||
|
|
|
|||
85
packages/cli/src/context/ingest/adapters/gdrive/chunk.ts
Normal file
85
packages/cli/src/context/ingest/adapters/gdrive/chunk.ts
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
import { createHash } from 'node:crypto';
|
||||
import { readdir, readFile } from 'node:fs/promises';
|
||||
import { join, relative } from 'node:path';
|
||||
import type { ChunkResult, DiffSet, ScopeDescriptor, WorkUnit } from '../../types.js';
|
||||
import { gdriveManifestSchema, gdriveMetadataSchema } from './types.js';
|
||||
|
||||
const GDRIVE_RECONCILE_GUIDANCE =
|
||||
'Synthesize durable wiki knowledge from this Google Doc. Preserve product definitions, process documentation, and operating rules as wiki pages. Do not create semantic-layer sources from gdrive content in v1.';
|
||||
|
||||
function normalizeRawPath(path: string): string {
|
||||
return path.replace(/\\/g, '/');
|
||||
}
|
||||
|
||||
async function walk(root: string): Promise<string[]> {
|
||||
const entries = await readdir(root, { withFileTypes: true, recursive: true });
|
||||
return entries
|
||||
.filter((entry) => entry.isFile())
|
||||
.map((entry) => normalizeRawPath(relative(root, join(entry.parentPath, entry.name))))
|
||||
.sort();
|
||||
}
|
||||
|
||||
function safeUnitKey(path: string): string {
|
||||
return `gdrive-${path.replace(/^docs\//, '').replace(/\/page\.md$/, '').replace(/[^a-zA-Z0-9]+/g, '-').replace(/^-+|-+$/g, '')}`;
|
||||
}
|
||||
|
||||
async function readManifest(stagedDir: string) {
|
||||
try {
|
||||
return gdriveManifestSchema.parse(JSON.parse(await readFile(join(stagedDir, 'manifest.json'), 'utf-8')));
|
||||
} catch (error) {
|
||||
throw new Error(`Invalid gdrive manifest: ${error instanceof Error ? error.message : String(error)}`);
|
||||
}
|
||||
}
|
||||
|
||||
export async function chunkGdriveStagedDir(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult> {
|
||||
const files = await walk(stagedDir);
|
||||
const manifest = await readManifest(stagedDir);
|
||||
const touched = diffSet
|
||||
? new Set([...diffSet.added, ...diffSet.modified].map((path) => normalizeRawPath(path)))
|
||||
: null;
|
||||
const workUnits: WorkUnit[] = [];
|
||||
|
||||
for (const pagePath of files.filter((path) => path.endsWith('/page.md'))) {
|
||||
const metadataPath = pagePath.replace(/\/page\.md$/, '/metadata.json');
|
||||
const primary = [metadataPath, pagePath].filter((path) => files.includes(path));
|
||||
if (touched && !primary.some((path) => touched.has(path))) {
|
||||
continue;
|
||||
}
|
||||
const metadata = gdriveMetadataSchema.parse(JSON.parse(await readFile(join(stagedDir, metadataPath), 'utf-8')));
|
||||
const rawFiles = touched ? primary.filter((path) => touched.has(path)).sort() : primary.sort();
|
||||
const dependencyPaths = ['manifest.json'].filter((path) => !rawFiles.includes(path));
|
||||
const excluded = new Set([...rawFiles, ...dependencyPaths]);
|
||||
const peerFileIndex = files.filter((path) => !excluded.has(path)).sort();
|
||||
workUnits.push({
|
||||
unitKey: safeUnitKey(pagePath),
|
||||
displayLabel: metadata.path,
|
||||
rawFiles,
|
||||
dependencyPaths,
|
||||
peerFileIndex,
|
||||
notes: GDRIVE_RECONCILE_GUIDANCE,
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
workUnits,
|
||||
eviction:
|
||||
diffSet && diffSet.deleted.length > 0
|
||||
? { deletedRawPaths: diffSet.deleted.map((path) => normalizeRawPath(path)).sort() }
|
||||
: undefined,
|
||||
reconcileNotes: ['Google Drive docs are knowledge-only in v1; keep output in wiki pages unless later follow-up work expands scope.'],
|
||||
contextReport: { capped: false, warnings: manifest.warnings },
|
||||
};
|
||||
}
|
||||
|
||||
export async function describeGdriveScope(stagedDir: string): Promise<ScopeDescriptor> {
|
||||
const manifest = await readManifest(stagedDir);
|
||||
const scopeKey = JSON.stringify({
|
||||
folderId: manifest.folderId,
|
||||
recursive: manifest.recursive,
|
||||
});
|
||||
const fingerprint = createHash('sha256').update(scopeKey).digest('hex');
|
||||
return {
|
||||
fingerprint,
|
||||
isPathInScope: (rawPath) => rawPath === 'manifest.json' || rawPath.startsWith('docs/'),
|
||||
};
|
||||
}
|
||||
20
packages/cli/src/context/ingest/adapters/gdrive/detect.ts
Normal file
20
packages/cli/src/context/ingest/adapters/gdrive/detect.ts
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
import { readFile, readdir } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
|
||||
export async function detectGdriveStagedDir(stagedDir: string): Promise<boolean> {
|
||||
try {
|
||||
const manifest = JSON.parse(await readFile(join(stagedDir, 'manifest.json'), 'utf-8')) as { source?: unknown };
|
||||
if (manifest.source === 'gdrive') {
|
||||
return true;
|
||||
}
|
||||
} catch {
|
||||
// Fall through to structural detection.
|
||||
}
|
||||
|
||||
try {
|
||||
const entries = await readdir(stagedDir, { withFileTypes: true, recursive: true });
|
||||
return entries.some((entry) => entry.isFile() && entry.name === 'page.md');
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
132
packages/cli/src/context/ingest/adapters/gdrive/fetch.ts
Normal file
132
packages/cli/src/context/ingest/adapters/gdrive/fetch.ts
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
import { createHash } from 'node:crypto';
|
||||
import { mkdir, writeFile } from 'node:fs/promises';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { createGoogleDocsClients, driveFolderChildrenQuery } from './gdrive-client.js';
|
||||
import { normalizeGoogleDocToMarkdown } from './normalize.js';
|
||||
import type { GdriveFileRecord, GdriveManifest, GdrivePullConfig } from './types.js';
|
||||
import { GDRIVE_DOC_MIME_TYPE, GDRIVE_FOLDER_MIME_TYPE, GDRIVE_SOURCE_KEY } from './types.js';
|
||||
|
||||
async function writeJson(path: string, value: unknown): Promise<void> {
|
||||
await mkdir(dirname(path), { recursive: true });
|
||||
await writeFile(path, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
|
||||
}
|
||||
|
||||
async function writeText(path: string, value: string): Promise<void> {
|
||||
await mkdir(dirname(path), { recursive: true });
|
||||
await writeFile(path, value.endsWith('\n') ? value : `${value}\n`, 'utf-8');
|
||||
}
|
||||
|
||||
function slugifySegment(value: string): string {
|
||||
const normalized = value
|
||||
.normalize('NFKD')
|
||||
.replace(/[^\x00-\x7F]/g, '')
|
||||
.replace(/[^a-zA-Z0-9]+/g, '-')
|
||||
.replace(/^-+|-+$/g, '')
|
||||
.toLowerCase();
|
||||
return normalized || 'untitled';
|
||||
}
|
||||
|
||||
function compactSegment(value: string, maxLength = 24): string {
|
||||
const slug = slugifySegment(value);
|
||||
return slug.length > maxLength ? slug.slice(0, maxLength).replace(/-+$/g, '') || 'untitled' : slug;
|
||||
}
|
||||
|
||||
function shortHash(value: string, length = 10): string {
|
||||
return createHash('sha1').update(value).digest('hex').slice(0, length);
|
||||
}
|
||||
|
||||
function gdriveDocDirName(title: string, fileId: string): string {
|
||||
return `${compactSegment(title)}-${shortHash(fileId)}`;
|
||||
}
|
||||
|
||||
interface GdriveDocRecord {
|
||||
file: GdriveFileRecord;
|
||||
drivePath: string[];
|
||||
folderId: string;
|
||||
}
|
||||
|
||||
interface GdriveSkippedFile {
|
||||
externalId: string;
|
||||
reason: string;
|
||||
}
|
||||
|
||||
interface ListFolderResult {
|
||||
docs: GdriveDocRecord[];
|
||||
skipped: GdriveSkippedFile[];
|
||||
}
|
||||
|
||||
async function listFolderFiles(
|
||||
drive: ReturnType<typeof createGoogleDocsClients>['drive'],
|
||||
folderId: string,
|
||||
recursive: boolean,
|
||||
parents: string[] = [],
|
||||
): Promise<ListFolderResult> {
|
||||
const q = driveFolderChildrenQuery(folderId);
|
||||
const docs: GdriveDocRecord[] = [];
|
||||
const skipped: GdriveSkippedFile[] = [];
|
||||
let pageToken: string | undefined;
|
||||
do {
|
||||
const page = await drive.listFiles({ q, pageToken });
|
||||
for (const file of page.files) {
|
||||
if (file.mimeType === GDRIVE_FOLDER_MIME_TYPE) {
|
||||
if (recursive) {
|
||||
const nested = await listFolderFiles(drive, file.id, true, [...parents, file.name]);
|
||||
docs.push(...nested.docs);
|
||||
skipped.push(...nested.skipped);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (file.mimeType !== GDRIVE_DOC_MIME_TYPE) {
|
||||
skipped.push({ externalId: file.id, reason: `unsupported mime type: ${file.mimeType}` });
|
||||
continue;
|
||||
}
|
||||
docs.push({ file, drivePath: parents, folderId });
|
||||
}
|
||||
pageToken = page.nextPageToken ?? undefined;
|
||||
} while (pageToken);
|
||||
return { docs, skipped };
|
||||
}
|
||||
|
||||
export async function fetchGdriveSnapshot(params: {
|
||||
key: unknown;
|
||||
config: GdrivePullConfig;
|
||||
stagedDir: string;
|
||||
}): Promise<GdriveManifest> {
|
||||
await mkdir(params.stagedDir, { recursive: true });
|
||||
const clients = createGoogleDocsClients(params.key);
|
||||
const { docs, skipped } = await listFolderFiles(clients.drive, params.config.folderId, params.config.recursive);
|
||||
|
||||
for (const { file, drivePath, folderId } of docs) {
|
||||
const document = await clients.docs.getDocument(file.id);
|
||||
const title = (document.title?.trim() || file.name).trim();
|
||||
const relDir = join('docs', ...drivePath.map((segment) => compactSegment(segment)), gdriveDocDirName(title, file.id));
|
||||
const markdownBody = normalizeGoogleDocToMarkdown(document);
|
||||
const pageMarkdown = [`# ${title}`, markdownBody].filter(Boolean).join('\n\n');
|
||||
await writeJson(join(params.stagedDir, relDir, 'metadata.json'), {
|
||||
id: file.id,
|
||||
title,
|
||||
path: [...drivePath, title].join(' / ') || title,
|
||||
url: file.webViewLink,
|
||||
mimeType: file.mimeType,
|
||||
folderId,
|
||||
drivePath,
|
||||
modifiedTime: file.modifiedTime,
|
||||
});
|
||||
await writeText(join(params.stagedDir, relDir, 'page.md'), pageMarkdown);
|
||||
}
|
||||
|
||||
const manifest: GdriveManifest = {
|
||||
source: GDRIVE_SOURCE_KEY,
|
||||
folderId: params.config.folderId,
|
||||
recursive: params.config.recursive,
|
||||
fetchedAt: new Date().toISOString(),
|
||||
fileCount: docs.length,
|
||||
skipped,
|
||||
warnings:
|
||||
skipped.length > 0
|
||||
? [`Skipped ${skipped.length} non-Google-Doc file(s); only Google Docs are ingested in v1.`]
|
||||
: [],
|
||||
};
|
||||
await writeJson(join(params.stagedDir, 'manifest.json'), manifest);
|
||||
return manifest;
|
||||
}
|
||||
188
packages/cli/src/context/ingest/adapters/gdrive/gdrive-client.ts
Normal file
188
packages/cli/src/context/ingest/adapters/gdrive/gdrive-client.ts
Normal file
|
|
@ -0,0 +1,188 @@
|
|||
import { JWT } from 'google-auth-library';
|
||||
import type { GdriveFileRecord, GdriveServiceAccountKey, GoogleDocsDocument } from './types.js';
|
||||
import { GDRIVE_DOC_MIME_TYPE, GDRIVE_FOLDER_MIME_TYPE, GDRIVE_SCOPES, gdriveServiceAccountKeySchema } from './types.js';
|
||||
|
||||
const GOOGLE_DRIVE_BASE_URL = 'https://www.googleapis.com/drive/v3';
|
||||
const GOOGLE_DOCS_BASE_URL = 'https://docs.googleapis.com/v1';
|
||||
const GOOGLE_FILE_FIELDS = 'id,name,mimeType,parents,webViewLink,modifiedTime';
|
||||
|
||||
const RETRYABLE_STATUSES = new Set([408, 429, 500, 502, 503, 504]);
|
||||
const MAX_REQUEST_ATTEMPTS = 4;
|
||||
|
||||
interface GoogleApiFile {
|
||||
id?: string;
|
||||
name?: string;
|
||||
mimeType?: string;
|
||||
parents?: string[];
|
||||
webViewLink?: string;
|
||||
modifiedTime?: string;
|
||||
}
|
||||
|
||||
interface GoogleApiListResponse {
|
||||
files?: GoogleApiFile[];
|
||||
nextPageToken?: string;
|
||||
}
|
||||
|
||||
export interface GoogleDriveClient {
|
||||
listFiles(args: { q: string; pageToken?: string }): Promise<{ files: GdriveFileRecord[]; nextPageToken: string | null }>;
|
||||
getFile(fileId: string): Promise<GdriveFileRecord | null>;
|
||||
}
|
||||
|
||||
export interface GoogleDocsClients {
|
||||
drive: GoogleDriveClient;
|
||||
docs: {
|
||||
getDocument(documentId: string): Promise<GoogleDocsDocument>;
|
||||
};
|
||||
}
|
||||
|
||||
function defaultSleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
function retryDelayMs(attempt: number, retryAfterHeader: string | null): number {
|
||||
const retryAfterSeconds = retryAfterHeader ? Number.parseInt(retryAfterHeader, 10) : Number.NaN;
|
||||
if (Number.isFinite(retryAfterSeconds) && retryAfterSeconds >= 0) {
|
||||
return Math.min(retryAfterSeconds * 1000, 30_000);
|
||||
}
|
||||
return Math.min(500 * 2 ** attempt, 8_000);
|
||||
}
|
||||
|
||||
/** @internal Retries transient Google API responses (429/5xx) honoring Retry-After. */
|
||||
export async function fetchWithGoogleRetry(
|
||||
doFetch: () => Promise<Response>,
|
||||
options: { maxAttempts?: number; sleep?: (ms: number) => Promise<void> } = {},
|
||||
): Promise<Response> {
|
||||
const maxAttempts = options.maxAttempts ?? MAX_REQUEST_ATTEMPTS;
|
||||
const sleep = options.sleep ?? defaultSleep;
|
||||
let response = await doFetch();
|
||||
for (let attempt = 1; attempt < maxAttempts && !response.ok && RETRYABLE_STATUSES.has(response.status); attempt += 1) {
|
||||
await sleep(retryDelayMs(attempt - 1, response.headers.get('retry-after')));
|
||||
response = await doFetch();
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
async function parseGoogleResponse<T>(response: Response): Promise<T> {
|
||||
if (!response.ok) {
|
||||
const body = await response.text();
|
||||
throw new Error(`Google API request failed (${response.status}): ${body || response.statusText}`);
|
||||
}
|
||||
return (await response.json()) as T;
|
||||
}
|
||||
|
||||
async function authorizedFetch(client: JWT, url: string): Promise<Response> {
|
||||
return fetchWithGoogleRetry(async () => {
|
||||
const headers = await client.getRequestHeaders(url);
|
||||
return fetch(url, { headers });
|
||||
});
|
||||
}
|
||||
|
||||
function isGoogleApiFileRecord(file: GoogleApiFile): file is GoogleApiFile & {
|
||||
id: string;
|
||||
name: string;
|
||||
mimeType: string;
|
||||
} {
|
||||
return typeof file.id === 'string' && typeof file.name === 'string' && typeof file.mimeType === 'string';
|
||||
}
|
||||
|
||||
function toFileRecord(file: GoogleApiFile & { id: string; name: string; mimeType: string }): GdriveFileRecord {
|
||||
return {
|
||||
id: file.id,
|
||||
name: file.name,
|
||||
mimeType: file.mimeType,
|
||||
parents: Array.isArray(file.parents) ? file.parents.filter((parent): parent is string => typeof parent === 'string') : [],
|
||||
webViewLink: typeof file.webViewLink === 'string' ? file.webViewLink : null,
|
||||
modifiedTime: typeof file.modifiedTime === 'string' ? file.modifiedTime : null,
|
||||
};
|
||||
}
|
||||
|
||||
function escapeDriveQueryValue(value: string): string {
|
||||
return value.replace(/\\/g, '\\\\').replace(/'/g, "\\'");
|
||||
}
|
||||
|
||||
/** Builds the Drive query for the non-trashed direct children of a folder, escaping the folder id. */
|
||||
export function driveFolderChildrenQuery(folderId: string): string {
|
||||
return `'${escapeDriveQueryValue(folderId)}' in parents and trashed = false`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Confirms `folderId` resolves to a folder the service account can read, then counts the
|
||||
* Google Docs directly inside it. Throws a caller-facing error when the id is missing or not a folder.
|
||||
*/
|
||||
export async function verifyGdriveFolderAndCountDocs(
|
||||
drive: GoogleDriveClient,
|
||||
folderId: string,
|
||||
): Promise<number> {
|
||||
const folder = await drive.getFile(folderId);
|
||||
if (!folder) {
|
||||
throw new Error(
|
||||
`Google Drive folder "${folderId}" is not accessible. Share it with the service account email and verify folder_id.`,
|
||||
);
|
||||
}
|
||||
if (folder.mimeType !== GDRIVE_FOLDER_MIME_TYPE) {
|
||||
throw new Error(`Google Drive id "${folderId}" is not a folder (mimeType: ${folder.mimeType}).`);
|
||||
}
|
||||
const q = driveFolderChildrenQuery(folderId);
|
||||
let docs = 0;
|
||||
let pageToken: string | undefined;
|
||||
do {
|
||||
const page = await drive.listFiles({ q, pageToken });
|
||||
docs += page.files.filter((file) => file.mimeType === GDRIVE_DOC_MIME_TYPE).length;
|
||||
pageToken = page.nextPageToken ?? undefined;
|
||||
} while (pageToken);
|
||||
return docs;
|
||||
}
|
||||
|
||||
export function createGoogleDocsClients(rawKey: unknown): GoogleDocsClients {
|
||||
const key = gdriveServiceAccountKeySchema.parse(rawKey) satisfies GdriveServiceAccountKey;
|
||||
const client = new JWT({
|
||||
email: key.client_email,
|
||||
key: key.private_key,
|
||||
scopes: [...GDRIVE_SCOPES],
|
||||
});
|
||||
|
||||
return {
|
||||
drive: {
|
||||
async listFiles(args) {
|
||||
const params = new URLSearchParams({
|
||||
q: args.q,
|
||||
supportsAllDrives: 'true',
|
||||
includeItemsFromAllDrives: 'true',
|
||||
pageSize: '1000',
|
||||
fields: `nextPageToken,files(${GOOGLE_FILE_FIELDS})`,
|
||||
});
|
||||
if (args.pageToken) {
|
||||
params.set('pageToken', args.pageToken);
|
||||
}
|
||||
const response = await authorizedFetch(client, `${GOOGLE_DRIVE_BASE_URL}/files?${params.toString()}`);
|
||||
const parsed = await parseGoogleResponse<GoogleApiListResponse>(response);
|
||||
return {
|
||||
files: (parsed.files ?? []).filter(isGoogleApiFileRecord).map(toFileRecord),
|
||||
nextPageToken: typeof parsed.nextPageToken === 'string' ? parsed.nextPageToken : null,
|
||||
};
|
||||
},
|
||||
async getFile(fileId: string) {
|
||||
const params = new URLSearchParams({ supportsAllDrives: 'true', fields: GOOGLE_FILE_FIELDS });
|
||||
const response = await authorizedFetch(
|
||||
client,
|
||||
`${GOOGLE_DRIVE_BASE_URL}/files/${encodeURIComponent(fileId)}?${params.toString()}`,
|
||||
);
|
||||
if (response.status === 404) {
|
||||
return null;
|
||||
}
|
||||
const file = await parseGoogleResponse<GoogleApiFile>(response);
|
||||
return isGoogleApiFileRecord(file) ? toFileRecord(file) : null;
|
||||
},
|
||||
},
|
||||
docs: {
|
||||
async getDocument(documentId: string) {
|
||||
const params = new URLSearchParams({
|
||||
includeTabsContent: 'true',
|
||||
suggestionsViewMode: 'PREVIEW_WITHOUT_SUGGESTIONS',
|
||||
});
|
||||
const response = await authorizedFetch(client, `${GOOGLE_DOCS_BASE_URL}/documents/${documentId}?${params.toString()}`);
|
||||
return await parseGoogleResponse<GoogleDocsDocument>(response);
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
import type { ChunkResult, DiffSet, FetchContext, ScopeDescriptor, SourceAdapter } from '../../types.js';
|
||||
import { chunkGdriveStagedDir, describeGdriveScope } from './chunk.js';
|
||||
import { detectGdriveStagedDir } from './detect.js';
|
||||
import { fetchGdriveSnapshot } from './fetch.js';
|
||||
import { gdrivePullConfigSchema } from './types.js';
|
||||
|
||||
export class GdriveSourceAdapter implements SourceAdapter {
|
||||
readonly source = 'gdrive';
|
||||
readonly skillNames = ['gdrive_synthesize'];
|
||||
readonly reconcileSkillNames: string[] = [];
|
||||
readonly evidenceIndexing = 'documents' as const;
|
||||
|
||||
detect(stagedDir: string): Promise<boolean> {
|
||||
return detectGdriveStagedDir(stagedDir);
|
||||
}
|
||||
|
||||
async fetch(pullConfig: unknown, stagedDir: string, _ctx: FetchContext): Promise<void> {
|
||||
const config = gdrivePullConfigSchema.parse(pullConfig);
|
||||
await fetchGdriveSnapshot({
|
||||
key: JSON.parse(config.serviceAccountKey),
|
||||
config,
|
||||
stagedDir,
|
||||
});
|
||||
}
|
||||
|
||||
chunk(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult> {
|
||||
return chunkGdriveStagedDir(stagedDir, diffSet);
|
||||
}
|
||||
|
||||
describeScope(stagedDir: string): Promise<ScopeDescriptor> {
|
||||
return describeGdriveScope(stagedDir);
|
||||
}
|
||||
}
|
||||
323
packages/cli/src/context/ingest/adapters/gdrive/normalize.ts
Normal file
323
packages/cli/src/context/ingest/adapters/gdrive/normalize.ts
Normal file
|
|
@ -0,0 +1,323 @@
|
|||
import type {
|
||||
GoogleDocsDocument,
|
||||
GoogleDocsDocumentStyle,
|
||||
GoogleDocsHeaderFooter,
|
||||
GoogleDocsLinkTarget,
|
||||
GoogleDocsList,
|
||||
GoogleDocsParagraph,
|
||||
GoogleDocsParagraphElement,
|
||||
GoogleDocsStructuralElement,
|
||||
GoogleDocsTab,
|
||||
GoogleDocsTable,
|
||||
GoogleDocsTableCell,
|
||||
} from './types.js';
|
||||
|
||||
function escapeMarkdownText(value: string): string {
|
||||
return value.replace(/([*_~`])/g, '\\$1');
|
||||
}
|
||||
|
||||
function normalizeInternalLinkTarget(prefix: 'heading' | 'bookmark', target: GoogleDocsLinkTarget | string | undefined): string | null {
|
||||
const id = typeof target === 'string' ? target : target?.id;
|
||||
if (!id?.trim()) {
|
||||
return null;
|
||||
}
|
||||
return `#${prefix}-${id.trim()}`;
|
||||
}
|
||||
|
||||
function resolveLinkHref(element: GoogleDocsParagraphElement): string | null {
|
||||
const link = element.textRun?.textStyle?.link;
|
||||
const href = link?.url?.trim();
|
||||
if (href) {
|
||||
return href;
|
||||
}
|
||||
return (
|
||||
normalizeInternalLinkTarget('heading', link?.heading) ??
|
||||
normalizeInternalLinkTarget('heading', link?.headingId) ??
|
||||
normalizeInternalLinkTarget('bookmark', link?.bookmark) ??
|
||||
normalizeInternalLinkTarget('bookmark', link?.bookmarkId) ??
|
||||
null
|
||||
);
|
||||
}
|
||||
|
||||
function normalizeTextRun(element: GoogleDocsParagraphElement): string {
|
||||
const content = element.textRun?.content ?? '';
|
||||
const style = element.textRun?.textStyle;
|
||||
let text = escapeMarkdownText(content.replace(/\r/g, ''));
|
||||
if (!text && element.inlineObjectElement) {
|
||||
return '[Embedded object]';
|
||||
}
|
||||
if (!text && element.pageBreak) {
|
||||
return '\n---\n';
|
||||
}
|
||||
if (!text) {
|
||||
return '';
|
||||
}
|
||||
const href = resolveLinkHref(element);
|
||||
const isCode = style?.weightedFontFamily?.fontFamily === 'Courier New';
|
||||
if (isCode) {
|
||||
text = `\`${text.replace(/`/g, '\\`')}\``;
|
||||
}
|
||||
if (style?.bold) {
|
||||
text = `**${text}**`;
|
||||
}
|
||||
if (style?.italic) {
|
||||
text = `*${text}*`;
|
||||
}
|
||||
if (style?.underline) {
|
||||
text = `<u>${text}</u>`;
|
||||
}
|
||||
if (style?.strikethrough) {
|
||||
text = `~~${text}~~`;
|
||||
}
|
||||
if (href) {
|
||||
text = `[${text}](${href.replace(/\)/g, '\\)')})`;
|
||||
}
|
||||
if (style?.baselineOffset === 'SUPERSCRIPT') {
|
||||
text = `<sup>${text}</sup>`;
|
||||
} else if (style?.baselineOffset === 'SUBSCRIPT') {
|
||||
text = `<sub>${text}</sub>`;
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
function paragraphText(paragraph: GoogleDocsParagraph | undefined): string {
|
||||
return (paragraph?.elements ?? [])
|
||||
.map((element) => normalizeTextRun(element))
|
||||
.join('')
|
||||
.replace(/\n/g, '')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function headingPrefix(namedStyleType: string | undefined): string | null {
|
||||
if (namedStyleType === 'TITLE') {
|
||||
return '#';
|
||||
}
|
||||
if (namedStyleType === 'SUBTITLE') {
|
||||
return '##';
|
||||
}
|
||||
if (!namedStyleType?.startsWith('HEADING_')) {
|
||||
return null;
|
||||
}
|
||||
const level = Number.parseInt(namedStyleType.slice('HEADING_'.length), 10);
|
||||
if (Number.isNaN(level) || level < 1) {
|
||||
return null;
|
||||
}
|
||||
return '#'.repeat(Math.min(level, 6));
|
||||
}
|
||||
|
||||
function isOrderedListLevel(level: { glyphType?: string; glyphSymbol?: string } | undefined): boolean {
|
||||
const glyphType = level?.glyphType?.toUpperCase();
|
||||
if (glyphType) {
|
||||
return (
|
||||
glyphType.includes('NUMBER') ||
|
||||
glyphType.includes('DECIMAL') ||
|
||||
glyphType.includes('ALPHA') ||
|
||||
glyphType.includes('ROMAN') ||
|
||||
glyphType.includes('LATIN')
|
||||
);
|
||||
}
|
||||
const glyphSymbol = level?.glyphSymbol?.trim();
|
||||
return glyphSymbol === '%0.' || glyphSymbol === '%0)' || glyphSymbol === '1.' || glyphSymbol === '1)';
|
||||
}
|
||||
|
||||
function listPrefix(paragraph: GoogleDocsParagraph, lists: Record<string, GoogleDocsList> | undefined): string | null {
|
||||
if (!paragraph.bullet) {
|
||||
return null;
|
||||
}
|
||||
const level = Math.max(paragraph.bullet.nestingLevel ?? 0, 0);
|
||||
const indent = ' '.repeat(level);
|
||||
const listDefinition = paragraph.bullet.listId ? lists?.[paragraph.bullet.listId] : undefined;
|
||||
const listLevel = listDefinition?.listProperties?.nestingLevels?.[level];
|
||||
return `${indent}${isOrderedListLevel(listLevel) ? '1. ' : '- '}`;
|
||||
}
|
||||
|
||||
function paragraphToMarkdown(
|
||||
paragraph: GoogleDocsParagraph | undefined,
|
||||
lists: Record<string, GoogleDocsList> | undefined,
|
||||
): string | null {
|
||||
const text = paragraphText(paragraph);
|
||||
if (!text) {
|
||||
return null;
|
||||
}
|
||||
const prefix = paragraph ? listPrefix(paragraph, lists) : null;
|
||||
if (prefix) {
|
||||
return `${prefix}${text}`;
|
||||
}
|
||||
const heading = headingPrefix(paragraph?.paragraphStyle?.namedStyleType);
|
||||
if (heading) {
|
||||
const headingLine = `${heading} ${text}`;
|
||||
const headingId = paragraph?.paragraphStyle?.headingId?.trim();
|
||||
return headingId ? `<a id="heading-${headingId}"></a>\n${headingLine}` : headingLine;
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
function normalizeTableCell(
|
||||
cell: GoogleDocsTableCell | undefined,
|
||||
lists: Record<string, GoogleDocsList> | undefined,
|
||||
): string {
|
||||
const blocks = normalizeStructuralElements(cell?.content ?? [], lists);
|
||||
return blocks
|
||||
.map((block) => block.replace(/\n/g, ' <br> '))
|
||||
.join(' / ')
|
||||
.replace(/\|/g, '\\|')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function markdownTableDivider(columnCount: number): string {
|
||||
return `| ${Array.from({ length: columnCount }, () => '---').join(' | ')} |`;
|
||||
}
|
||||
|
||||
function normalizeTable(table: GoogleDocsTable | undefined, lists: Record<string, GoogleDocsList> | undefined): string[] {
|
||||
const rows = table?.tableRows ?? [];
|
||||
const normalizedRows = rows
|
||||
.map((row) => (row.tableCells ?? []).map((cell) => normalizeTableCell(cell, lists)))
|
||||
.filter((cells) => cells.length > 0);
|
||||
if (normalizedRows.length === 0) {
|
||||
return [];
|
||||
}
|
||||
const columnCount = Math.max(...normalizedRows.map((cells) => cells.length));
|
||||
const paddedRows = normalizedRows.map((cells) =>
|
||||
Array.from({ length: columnCount }, (_, index) => cells[index] ?? ''),
|
||||
);
|
||||
const [header, ...body] = paddedRows;
|
||||
const blocks = [`| ${header.join(' | ')} |`, markdownTableDivider(columnCount)];
|
||||
for (const row of body) {
|
||||
blocks.push(`| ${row.join(' | ')} |`);
|
||||
}
|
||||
return [blocks.join('\n')];
|
||||
}
|
||||
|
||||
function normalizeStructuralElements(
|
||||
elements: GoogleDocsStructuralElement[],
|
||||
lists: Record<string, GoogleDocsList> | undefined,
|
||||
): string[] {
|
||||
const blocks: string[] = [];
|
||||
for (const element of elements) {
|
||||
const line = paragraphToMarkdown(element.paragraph, lists);
|
||||
if (line) {
|
||||
blocks.push(line);
|
||||
continue;
|
||||
}
|
||||
if (element.table) {
|
||||
blocks.push(...normalizeTable(element.table, lists));
|
||||
}
|
||||
}
|
||||
return blocks;
|
||||
}
|
||||
|
||||
function headerFooterRoleMap(
|
||||
label: 'Headers' | 'Footers',
|
||||
documentStyle: GoogleDocsDocumentStyle | undefined,
|
||||
): Map<string, string> {
|
||||
const roleMap = new Map<string, string>();
|
||||
const roleEntries =
|
||||
label === 'Headers'
|
||||
? [
|
||||
[documentStyle?.defaultHeaderId, 'Default Header'],
|
||||
[documentStyle?.firstPageHeaderId, 'First Page Header'],
|
||||
[documentStyle?.evenPageHeaderId, 'Even Page Header'],
|
||||
]
|
||||
: [
|
||||
[documentStyle?.defaultFooterId, 'Default Footer'],
|
||||
[documentStyle?.firstPageFooterId, 'First Page Footer'],
|
||||
[documentStyle?.evenPageFooterId, 'Even Page Footer'],
|
||||
];
|
||||
for (const [id, role] of roleEntries) {
|
||||
const normalizedId = id?.trim();
|
||||
if (!normalizedId || roleMap.has(normalizedId)) {
|
||||
continue;
|
||||
}
|
||||
roleMap.set(normalizedId, role ?? normalizedId);
|
||||
}
|
||||
return roleMap;
|
||||
}
|
||||
|
||||
function normalizeHeaderFooterMap(
|
||||
label: 'Headers' | 'Footers',
|
||||
entries: Record<string, GoogleDocsHeaderFooter> | undefined,
|
||||
lists: Record<string, GoogleDocsList> | undefined,
|
||||
documentStyle: GoogleDocsDocumentStyle | undefined,
|
||||
): string | null {
|
||||
if (!entries) {
|
||||
return null;
|
||||
}
|
||||
const ids = Object.keys(entries).sort();
|
||||
const roles = headerFooterRoleMap(label, documentStyle);
|
||||
const sections: string[] = [];
|
||||
for (const id of ids) {
|
||||
const blocks = normalizeStructuralElements(entries[id]?.content ?? [], lists);
|
||||
if (blocks.length === 0) {
|
||||
continue;
|
||||
}
|
||||
const title = roles.get(id) ?? `${label.slice(0, -1)} ${escapeMarkdownText(id)}`;
|
||||
sections.push(`### ${title}\n\n${blocks.join('\n\n').trim()}`);
|
||||
}
|
||||
if (sections.length === 0) {
|
||||
return null;
|
||||
}
|
||||
return `## ${label}\n\n${sections.join('\n\n').trim()}`;
|
||||
}
|
||||
|
||||
function joinNonEmptySections(sections: Array<string | null>): string | null {
|
||||
const nonEmpty = sections.filter((section): section is string => Boolean(section?.trim()));
|
||||
if (nonEmpty.length === 0) {
|
||||
return null;
|
||||
}
|
||||
return nonEmpty.join('\n\n').trim();
|
||||
}
|
||||
|
||||
function flattenGoogleDocsTabs(tabs: GoogleDocsTab[] | undefined): GoogleDocsTab[] {
|
||||
if (!tabs?.length) {
|
||||
return [];
|
||||
}
|
||||
const flattened: GoogleDocsTab[] = [];
|
||||
for (const tab of tabs) {
|
||||
flattened.push(tab);
|
||||
flattened.push(...flattenGoogleDocsTabs(tab.childTabs));
|
||||
}
|
||||
return flattened;
|
||||
}
|
||||
|
||||
function normalizeTab(tab: GoogleDocsTab, fallbackLists: Record<string, GoogleDocsList> | undefined): string | null {
|
||||
const lists = tab.documentTab?.lists ?? fallbackLists;
|
||||
const headerSection = normalizeHeaderFooterMap(
|
||||
'Headers',
|
||||
tab.documentTab?.headers,
|
||||
lists,
|
||||
tab.documentTab?.documentStyle,
|
||||
);
|
||||
const bodySection = normalizeStructuralElements(tab.documentTab?.body?.content ?? [], lists).join('\n\n').trim();
|
||||
const footerSection = normalizeHeaderFooterMap(
|
||||
'Footers',
|
||||
tab.documentTab?.footers,
|
||||
lists,
|
||||
tab.documentTab?.documentStyle,
|
||||
);
|
||||
const content = joinNonEmptySections([headerSection, bodySection, footerSection]);
|
||||
if (!content) {
|
||||
return null;
|
||||
}
|
||||
const title = tab.tabProperties?.title?.trim();
|
||||
if (!title) {
|
||||
return content;
|
||||
}
|
||||
return [`# ${escapeMarkdownText(title)}`, content].join('\n\n').trim();
|
||||
}
|
||||
|
||||
export function normalizeGoogleDocToMarkdown(document: GoogleDocsDocument): string {
|
||||
const normalizedTabs = flattenGoogleDocsTabs(document.tabs)
|
||||
.map((tab) => normalizeTab(tab, document.lists))
|
||||
.filter((tab): tab is string => Boolean(tab));
|
||||
if (normalizedTabs.length > 0) {
|
||||
return normalizedTabs.join('\n\n').trim();
|
||||
}
|
||||
const bodySection = normalizeStructuralElements(document.body?.content ?? [], document.lists).join('\n\n').trim();
|
||||
return (
|
||||
joinNonEmptySections([
|
||||
normalizeHeaderFooterMap('Headers', document.headers, document.lists, document.documentStyle),
|
||||
bodySection,
|
||||
normalizeHeaderFooterMap('Footers', document.footers, document.lists, document.documentStyle),
|
||||
]) ?? ''
|
||||
);
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue