diff --git a/docs-site/app/docs/[[...slug]]/page.tsx b/docs-site/app/docs/[[...slug]]/page.tsx index d1ae21d4..dd5d944c 100644 --- a/docs-site/app/docs/[[...slug]]/page.tsx +++ b/docs-site/app/docs/[[...slug]]/page.tsx @@ -39,20 +39,29 @@ export default async function Page(props: { const hero = isHeroPage(params.slug); return ( - + {!hero && ( <> -
+
{page.data.title}
- {page.data.description} + + {page.data.description} + )} - + diff --git a/docs-site/app/layout.tsx b/docs-site/app/layout.tsx index 35a4b1fa..48e12a3f 100644 --- a/docs-site/app/layout.tsx +++ b/docs-site/app/layout.tsx @@ -27,6 +27,10 @@ export const metadata: Metadata = { }, description: "Open-source context infrastructure that makes agentic analytics reliable.", + icons: { + icon: "/brand/ktx-mascot.svg", + shortcut: "/brand/ktx-mascot.svg", + }, }; export default function RootLayout({ children }: { children: ReactNode }) { diff --git a/docs-site/components/product-mechanics.tsx b/docs-site/components/product-mechanics.tsx new file mode 100644 index 00000000..7f551450 --- /dev/null +++ b/docs-site/components/product-mechanics.tsx @@ -0,0 +1,402 @@ +import type { ReactNode } from "react"; + +const sourceInputs = [ + { + name: "Warehouse schema", + detail: "tables, columns, types, constraints, row counts", + signal: "grounds definitions in live database structure", + accent: "border-fd-primary", + }, + { + name: "Metabase and query history", + detail: "historic SQL, questions, dashboards, usage patterns", + signal: "extracts joins, filters, grain, and trusted examples", + accent: "border-orange-500", + }, + { + name: "dbt, MetricFlow, LookML", + detail: "models, metrics, dimensions, explores, joins", + signal: "maps existing modeling logic into semantic entities", + accent: "border-amber-500", + }, + { + name: "Company documentation", + detail: "Notion pages, policies, caveats, analyst notes", + signal: "links business language back to semantic references", + accent: "border-slate-500 dark:border-cyan-200", + }, +]; + +const ingestSteps = [ + { + title: "extract evidence", + body: "Pull structured facts from schemas, SQL, BI metadata, and docs.", + }, + { + title: "reconcile entities", + body: "Merge names, measures, joins, and caveats into one project model.", + }, + { + title: "validate references", + body: "Check semantic fields and joins against database context before agents use them.", + }, +]; + +const artifacts = [ + { + path: "semantic-layer/*.yaml", + title: "Typed query model", + body: "sources, grain, joins, dimensions, measures, filters, segments", + }, + { + path: "wiki/*.md", + title: "Business context", + body: "rules and caveats with sl_refs back to semantic-layer entities", + }, + { + path: "raw-sources/", + title: "Evidence trail", + body: "scan artifacts, extracted metadata, relationship evidence", + }, + { + path: ".ktx/", + title: "Local indexes", + body: "embeddings and search indexes, not the source of truth", + }, +]; + +const runtimeSteps = [ + { + title: "Search wiki", + body: "Find business rules, caveats, synonyms, and sl_refs.", + }, + { + title: "Resolve semantic refs", + body: "Map measure and dimension names to approved entities.", + }, + { + title: "Validate fields", + body: "Check source, columns, joins, grain, filters, and segments.", + }, + { + title: "Build query plan", + body: "Create a semantic query plan before SQL is generated.", + }, + { + title: "Compile dialect SQL", + body: "Generate warehouse-shaped SQL instead of copying examples.", + }, + { + title: "Execute with bounds", + body: "Optionally run with bounded rows and return provenance.", + }, +]; + +export function ProductMechanics() { + return ( +
+
+

+ Product mechanics +

+

+ A semantic compiler for analytics agents +

+

+ KTX builds typed semantic files, links wiki context back to those + entities, validates the model against database evidence, then compiles + agent requests into executable SQL. +

+
+ +
+ + +
+
+ ); +} + +function IngestionDiagram() { + return ( +
+ + +
+
+ Inputs KTX reads +
+ {sourceInputs.map((source) => ( +
+

+ {source.name} +

+

+ {source.detail} +

+

+ {source.signal} +

+
+ ))} +
+
+ +
+ KTX builds the model +
+
+

+ Ingest pipeline +

+
    + {ingestSteps.map((step, index) => ( + + ))} +
+
+ +
+ {artifacts.map((artifact) => ( + + ))} +
+
+
+
+
+ ); +} + +function RuntimeDiagram() { + return ( +
+ + +
+
+ Agent sends + +
connection: warehouse
+
measure: orders.total_revenue
+
dimension: customers.segment
+
filter: orders.created_date >= '2024-01-01'
+
+

+ This is the API surface agents should use: compact semantic intent, + not hand-written warehouse SQL. +

+
+ +
+ KTX planning and execution +
    + {runtimeSteps.map((step, index) => ( + + ))} +
+
+
+ +
+
+ Semantic query plan +
+

+ source:{" "} + orders joined to customers as many_to_one +

+

+ measure:{" "} + total_revenue = sum(amount) with refund filter +

+

+ grain: segment + group-by with date predicate +

+

+ result: dialect + SQL, bounded rows, and provenance +

+
+
+ +
+ KTX returns + +
select
+
customers.segment,
+
sum(orders.amount) as total_revenue
+
from analytics.orders
+
join analytics.customers
+
on orders.customer_id = customers.id
+
where orders.status != 'refunded'
+
and orders.created_date >= '2024-01-01'
+
group by 1
+
+

+ The output can be SQL-only or executed results with provenance, so + the agent can show where the answer came from. +

+
+
+
+ ); +} + +function DiagramHeader({ + body, + eyebrow, + id, + title, +}: { + body: string; + eyebrow: string; + id: string; + title: string; +}) { + return ( +
+

+ {eyebrow} +

+

+ {title} +

+

+ {body} +

+
+ ); +} + +function Artifact({ + body, + path, + title, +}: { + body: string; + path: string; + title: string; +}) { + return ( +
+

+ {path} +

+

{title}

+

+ {body} +

+
+ ); +} + +function PipelineStep({ + body, + dark = false, + index, + title, +}: { + body: string; + dark?: boolean; + index: number; + title: string; +}) { + return ( +
  • + + {index} + + + + {title} + + + {body} + + +
  • + ); +} + +function ColumnLabel({ children }: { children: ReactNode }) { + return ( +

    + {children} +

    + ); +} + +function CodeBox({ children }: { children: ReactNode }) { + return ( +
    +
    {children}
    +
    + ); +} diff --git a/docs-site/content/docs/cli-reference/ktx-ingest.mdx b/docs-site/content/docs/cli-reference/ktx-ingest.mdx index a0bca58f..ab907992 100644 --- a/docs-site/content/docs/cli-reference/ktx-ingest.mdx +++ b/docs-site/content/docs/cli-reference/ktx-ingest.mdx @@ -29,14 +29,16 @@ connections when you use `--all`. | `--deep` | Use AI-enriched database ingest | Stored connection default, or `fast` | | `--query-history` | Include database query-history usage patterns | Stored connection default | | `--no-query-history` | Skip database query-history usage patterns for this run | Stored connection default | -| `--query-history-window-days ` | Query-history lookback window for this run | Stored connection default | +| `--query-history-window-days ` | BigQuery/Snowflake query-history lookback window for this run | Stored connection default | | `--plain` | Print plain text output | `true` | | `--json` | Print JSON output | `false` | | `--no-input` | Disable interactive terminal input | — | `--fast` and `--deep` are mutually exclusive. Depth flags apply only to database connections. Query-history flags apply only to database connections -that support query history. Query-history ingest runs after schema ingest and +that support query history. The window flag applies to BigQuery and Snowflake; +Postgres reads the current `pg_stat_statements` aggregate data instead of a +time-windowed history table. Query-history ingest runs after schema ingest and requires deep ingest readiness. When `--all` selects both databases and context sources, database ingest runs @@ -70,6 +72,7 @@ ktx ingest warehouse --deep # Include query-history usage patterns ktx ingest warehouse --deep --query-history +# Set the lookback window for BigQuery or Snowflake query history ktx ingest warehouse --query-history-window-days 30 # Build a source connection diff --git a/docs-site/content/docs/cli-reference/ktx-setup.mdx b/docs-site/content/docs/cli-reference/ktx-setup.mdx index 4de40ecb..90d0b175 100644 --- a/docs-site/content/docs/cli-reference/ktx-setup.mdx +++ b/docs-site/content/docs/cli-reference/ktx-setup.mdx @@ -96,13 +96,16 @@ incomplete. |------|-------------| | `--enable-query-history` | Enable query-history ingest when the selected database supports it | | `--disable-query-history` | Disable query-history ingest for the selected database | -| `--query-history-window-days ` | Query-history lookback window | +| `--query-history-window-days ` | BigQuery/Snowflake query-history lookback window | | `--query-history-min-executions ` | Minimum executions for a query-history template | | `--query-history-service-account-pattern ` | Query-history service-account regex; repeatable | | `--query-history-redaction-pattern ` | Query-history SQL-literal redaction regex; repeatable | -Query history setup is supported for Postgres, BigQuery, and Snowflake. Enabling -query history makes deep ingest readiness matter for later `ktx ingest` runs. +Query history setup is supported for Postgres, BigQuery, and Snowflake. The +window flag applies to BigQuery and Snowflake; Postgres reads the current +`pg_stat_statements` aggregate data instead of a time-windowed history table. +Enabling query history makes deep ingest readiness matter for later +`ktx ingest` runs. ### Context Sources diff --git a/docs-site/content/docs/concepts/meta.json b/docs-site/content/docs/concepts/meta.json index e1749365..72c0a407 100644 --- a/docs-site/content/docs/concepts/meta.json +++ b/docs-site/content/docs/concepts/meta.json @@ -1,5 +1,5 @@ { "title": "Concepts", "defaultOpen": true, - "pages": ["the-context-layer", "context-as-code"] + "pages": ["the-context-layer", "semantic-layer-internals", "context-as-code"] } diff --git a/docs-site/content/docs/concepts/semantic-layer-internals.mdx b/docs-site/content/docs/concepts/semantic-layer-internals.mdx new file mode 100644 index 00000000..c48428e6 --- /dev/null +++ b/docs-site/content/docs/concepts/semantic-layer-internals.mdx @@ -0,0 +1,398 @@ +--- +title: Semantic Layer Internals +description: How KTX uses join graphs, grain, and relationship metadata to turn context into safe SQL. +--- + +KTX is a context layer for agents. This page focuses on one internal subsystem: +the semantic execution layer that turns reviewed context into safe SQL. + +The semantic layer is important, but it is not the whole product. KTX also +handles schema evidence, wiki context, provenance, validation, and agent +workflows around those files. + +Read the page as a pipeline: + +- context inputs feed the semantic engine; +- evidence becomes a join graph with grain and relationship metadata; +- review and corrections keep that graph current; +- the execution engine uses the graph to avoid fan-out and ambiguous joins. + +## Where the semantic layer fits + +The semantic layer is not a separate product category inside KTX. It is the +engine that makes the rest of the context actionable for SQL generation. + +
    +
    +
    +

    + {"Context inputs"} +

    +
    +
    +

    semantic-layer/

    +

    + {"source YAML, measures, joins, grain"} +

    +
    +
    +

    wiki/

    +

    + {"business rules, definitions, caveats"} +

    +
    +
    +

    raw-sources/

    +

    + {"schema scans, keys, imported metadata"} +

    +
    +
    +

    provenance

    +

    + {"ingest decisions and review history"} +

    +
    +
    +
    + + + +
    +
    +

    + {"Semantic layer engine"} +

    +
    +
    +

    Join graph

    +

    + {"sources as nodes, joins as typed edges"} +

    +
    +
    +

    Grain

    +

    + {"row identity before aggregation"} +

    +
    +
    +

    Measures

    +

    + {"verified formulas and filters"} +

    +
    +
    +

    Relationships

    +

    + {"many_to_one, one_to_many, one_to_one"} +

    +
    +
    +
    + {"Safe query planning before SQL is generated."} +
    +
    + + + +
    +

    + {"Agent workflows"} +

    +
    +
    + {"Search sources and wiki pages"} +
    +
    + {"Compile trusted SQL"} +
    +
    + {"Explain metrics and provenance"} +
    +
    + {"Patch files and validate review"} +
    +
    +
    +
    +
    + +## The join graph KTX builds + +A semantic source is a node. A join is an edge with a join condition and a +relationship type. The graph lets KTX choose valid paths, reject unsafe paths, +and reason about whether a join preserves or multiplies rows before SQL is +generated. + +- `many_to_one` paths are usually safe for adding dimensions. +- `one_to_many` paths can multiply fact rows and trigger fan-out handling. +- Equal-cost paths can be ambiguous, so aliases and explicit joins matter. + +
    +
    +
    +

    customers

    +

    grain: customer_id

    +
    +
    +

    orders

    +

    grain: order_id

    +
    +
    +

    order_items

    +

    grain: order_id, line_id

    +
    +
    +
    +
    orders -> customers: many_to_one
    +
    orders -> order_items: one_to_many
    +
    +
    + {"Example: "} + {"refunds joins to orders. Used carefully, it explains net revenue. Joined naively, it can duplicate order-level measures."} +
    +
    + +The graph is bidirectional for planning. If `orders -> customers` is +`many_to_one`, the reverse path is `one_to_many`; KTX keeps that distinction +instead of treating every join as a neutral edge. + +## How KTX builds the graph + +KTX starts from evidence, not a blank modeling canvas. Database scans and +analytics-tool imports create source definitions that an analyst can review. + +| Evidence | What it contributes | +|---|---| +| Declared primary keys | Initial row grain for each source | +| Declared foreign keys | Formal join candidates and relationship direction | +| Inferred relationships | Useful edges when warehouses lack constraints | +| dbt, MetricFlow, and LookML imports | Existing metrics, dimensions, entities, explores, and joins | +| Query history | Real join and filter patterns agents should respect | +| Analyst review | The final authority before context is merged | + +Generated YAML is intentionally reviewable. KTX can draft joins and measures, +but the accepted semantic layer is still the plain-file diff your team approves. + +## How KTX keeps the graph current + +The semantic layer changes as schemas, metrics, and business rules change. KTX +keeps that loop explicit instead of hiding it behind a remote runtime. + +
    +
    +

    + {"Semantic maintenance loop"} +

    +

    + {"Every accepted correction becomes input to the next graph build."} +

    +
    +
    +
    +
    + + +
    +

    + {"reviewed context"} +

    +

    + {"The accepted graph becomes the starting point for the next build."} +

    +
    + +
    +

    + {"Step 1"} +

    +

    {"ingest evidence"}

    +

    + {"scan schemas, imports, and accepted files"} +

    +
    +
    +

    + {"Step 2"} +

    +

    {"YAML diff"}

    +

    + {"draft source, join, grain, and measure changes"} +

    +
    +
    +

    + {"Step 3"} +

    +

    {"validation"}

    +

    + {"check relationships, syntax, and unsafe query shapes"} +

    +
    +
    +

    + {"Step 4"} +

    +

    {"analyst review"}

    +

    + {"accept, edit, or reject generated context"} +

    +
    +
    +

    + {"Step 5"} +

    +

    {"agent use"}

    +

    + {"serve context to search, explain, and query"} +

    +
    +
    +

    + {"Step 6"} +

    +

    {"corrections"}

    +

    + {"agent and analyst fixes become new evidence"} +

    +
    +
    +
    +
    +
    + +This matters because semantic correctness is not static. If a source gains a +new key, a metric changes definition, or an analyst corrects a relationship, +the next agent gets that reviewed context. + +## The modeling problem the graph solves + +Fan-out is the classic failure mode. If an order-level measure is joined to +line-item rows before aggregation, one order can become many rows and revenue +can be counted more than once. + +| Problem | What happens | How KTX avoids it | +|---|---|---| +| Order measure joins to `order_items` | `orders.revenue` repeats once per item | Detect the `one_to_many` path and pre-aggregate the order measure | +| Two independent fact sources share `customers` | Measures from each fact table multiply across the shared dimension | Treat it as a chasm trap and use aggregate-locality planning | +| Filter lives only across a `one_to_many` path | Filtering after the join changes the measure grain | Reject or localize the filter instead of silently producing unsafe SQL | +| Multiple equal-cost paths connect the same sources | The join path is ambiguous | Prefer safer paths and use aliases to disambiguate repeated joins | + +Many-to-many questions usually show up as multiple one-to-many paths or +independent fact sources. KTX treats those shapes as fan-out or chasm risks +unless the query can be planned at a safe grain. + +## How the execution engine uses the graph + +The planner resolves the sources in a semantic query, chooses a join tree, and +checks whether any requested dimension or filter crosses a row-multiplying +edge. The SQL generator then chooses the simple path or the aggregate-locality +path. + +| Naive SQL shape | Semantic-layer SQL shape | +|---|---| +| Join facts and dimensions first, then aggregate | Aggregate each fact source at its own grain, then join the results | +| Put every filter in one outer `WHERE` clause | Keep measure filters with the measure source when locality is needed | +| Trust the shortest textual join path | Prefer safe relationship paths and reject disconnected sources | +| Let dimension grain differ across facts | Raise when asymmetric dimensions would fan out another measure | + +
    +
    +
    +

    + {"Unsafe shape"} +

    +
    +{`orders
    +  join order_items
    +  join customers
    +group by customer_segment
    +sum(orders.amount)`}
    +      
    +

    + {"The order measure is exposed to line-item fan-out before aggregation."} +

    +
    +
    +

    + {"KTX shape"} +

    +
    +{`orders_agg as (
    +  select customer_id, sum(amount) revenue
    +  from orders
    +  group by customer_id
    +)
    +select customers.segment, sum(revenue)
    +from orders_agg
    +join customers`}
    +      
    +

    + {"KTX pre-aggregates fact measures at their own grain before joining dimensions."} +

    +
    +
    +
    + +The result is not magic. It is structured planning: validated sources, typed +relationships, graph search, fan-out detection, aggregate locality, and final +dialect transpilation. + +## What this means for agents + +KTX gives agents a semantic surface they can inspect and improve, not just a +folder of notes. + +- Search semantic sources and related wiki pages before writing SQL. +- Compile SQL through `ktx sl query` instead of guessing joins. +- Validate semantic-layer changes before review. +- Patch YAML and Markdown files in git. +- Explain metric meaning and provenance from the same accepted context. + +Next, read [Writing Context](/docs/guides/writing-context) for the YAML editing +workflow or [ktx sl](/docs/cli-reference/ktx-sl) for the command reference. diff --git a/docs-site/content/docs/concepts/the-context-layer.mdx b/docs-site/content/docs/concepts/the-context-layer.mdx index cb03b7c0..ba7ee3f3 100644 --- a/docs-site/content/docs/concepts/the-context-layer.mdx +++ b/docs-site/content/docs/concepts/the-context-layer.mdx @@ -191,7 +191,18 @@ KTX organizes context into four pillars: Each pillar covers a different kind of context agents need before they can safely write SQL, update semantic definitions, or explain an analytics result. -**Semantic sources** are YAML definitions that describe your data in terms agents can reason about. Each source maps to a table or SQL query, declares its grain, defines typed columns, specifies valid joins, and exposes named measures with optional filters. This is where "revenue means `sum(amount)` excluding refunds" lives. +**Semantic sources** are YAML definitions that describe your data in terms +agents can reason about: + +- source tables or SQL queries; +- row grain; +- typed columns; +- valid joins; +- named measures, filters, and segments. + +This is where "revenue means `sum(amount)` excluding refunds" lives. For the +join graph, fan-out protections, and execution mechanics, read +[Semantic Layer Internals](/docs/concepts/semantic-layer-internals). ```yaml name: orders @@ -289,7 +300,7 @@ my-project/ │ └── data-quality-notes.md ├── raw-sources/ │ └── warehouse/ -│ └── database-ingest/ # Schema ingest artifacts and reports +│ └── live-database/ # Schema ingest artifacts and reports └── .ktx/ ├── db.sqlite # Local state (git-ignored) └── cache/ # Runtime cache (git-ignored) diff --git a/docs-site/content/docs/getting-started/introduction.mdx b/docs-site/content/docs/getting-started/introduction.mdx index cb8ac0dd..7a6c9b3e 100644 --- a/docs-site/content/docs/getting-started/introduction.mdx +++ b/docs-site/content/docs/getting-started/introduction.mdx @@ -3,10 +3,12 @@ title: Introduction description: How KTX gives analytics agents trusted context for warehouse work. --- -
    -
    +import { ProductMechanics } from "@/components/product-mechanics"; + +
    +

    - Make analytics context{'\n'}usable by agents + Make analytics context usable by agents

    -

    - KTX turns warehouse metadata, semantic definitions, and business knowledge - into reviewable project files that agents can use while planning, querying, - and updating analytics work. +

    + {'KTX turns warehouse metadata, semantic definitions, and business knowledge into reviewable project files that agents can use while planning, querying, and updating analytics work.'}

    -
    -## Who KTX is for + + +## What agents can do with KTX KTX is built for analytics engineers and data teams who want data agents to -work on real analytics systems - not just generate one-off SQL. +work on real analytics systems, not just generate one-off SQL. -Use KTX when you want agents to: +Use it when agents need to: -- **Generate SQL** from approved measures and joins -- **Repair semantic definitions** through reviewable diffs -- **Explain metric provenance** with warehouse evidence -- **Work alongside** dbt, LookML, MetricFlow, Looker, Metabase, and modern BI platforms +- **Generate SQL** from approved measures, dimensions, joins, and filters +- **Explain provenance** with wiki context and warehouse evidence +- **Repair context** through reviewable YAML and Markdown diffs +- **Work alongside** dbt, LookML, MetricFlow, Looker, Metabase, and warehouses -Works with PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, and SQL Server. +KTX works with SQLite, PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, and +SQL Server. -## Explore the docs +## Read next Set up KTX and build your first context in under 10 minutes. - - Understand what a context layer is and why agents need one. - Hands-on workflows for scanning, ingesting, writing, and serving. + + Edit semantic-layer YAML and wiki Markdown safely. + Complete flag and subcommand reference for every KTX command. diff --git a/docs-site/content/docs/getting-started/quickstart.mdx b/docs-site/content/docs/getting-started/quickstart.mdx index 335aedfa..84bf4611 100644 --- a/docs-site/content/docs/getting-started/quickstart.mdx +++ b/docs-site/content/docs/getting-started/quickstart.mdx @@ -51,8 +51,8 @@ For scripted setup, pass the project directory explicitly: ktx setup --project-dir ./analytics ``` -If setup exits early, rerun `ktx setup` in the same directory. KTX tracks -completed setup steps and resumes from the remaining work. +If setup exits early, rerun `ktx setup` in the same directory. KTX keeps local +setup progress under `.ktx/setup/` and resumes from the remaining work. ## Step 2: Configure the LLM @@ -122,7 +122,8 @@ Database ready PostgreSQL, BigQuery, and Snowflake can also enable query-history ingest. Query history helps KTX learn common query patterns, joins, service-account filters, -and warehouse-specific usage. +and warehouse-specific usage. BigQuery and Snowflake support a lookback window; +Postgres reads the current `pg_stat_statements` aggregate data instead. ## Step 5: Add context sources @@ -200,7 +201,7 @@ KTX writes plain files so people and agents can inspect changes in git. | Path | Purpose | |------|---------| -| `ktx.yaml` | Project configuration for LLMs, embeddings, connections, context sources, and setup state | +| `ktx.yaml` | Project configuration for LLMs, embeddings, connections, context sources, and query-history settings | | `.ktx/secrets/*` | Local secret files referenced from `ktx.yaml`; do not commit these | | `.ktx/setup/*` | Local setup and context-build state | | `.ktx/agents/install-manifest.json` | Manifest used to manage installed agent files | diff --git a/docs-site/content/docs/guides/building-context.mdx b/docs-site/content/docs/guides/building-context.mdx index c21b7921..5fd288a6 100644 --- a/docs-site/content/docs/guides/building-context.mdx +++ b/docs-site/content/docs/guides/building-context.mdx @@ -62,13 +62,15 @@ configured, run `ktx setup` or use `--fast`. PostgreSQL, BigQuery, and Snowflake can add query-history context. This helps KTX learn common joins, filters, service-account patterns, redaction rules, and -usage-heavy query templates. +usage-heavy query templates. BigQuery and Snowflake support a lookback window; +Postgres reads the current `pg_stat_statements` aggregate data instead. Enable it during setup, store it under `connections..context.queryHistory`, or request it for one run: ```bash ktx ingest warehouse --deep --query-history +# Set the lookback window for BigQuery or Snowflake query history ktx ingest warehouse --query-history-window-days 30 ``` diff --git a/docs-site/content/docs/guides/writing-context.mdx b/docs-site/content/docs/guides/writing-context.mdx index fe9d3fdb..b68960bf 100644 --- a/docs-site/content/docs/guides/writing-context.mdx +++ b/docs-site/content/docs/guides/writing-context.mdx @@ -60,21 +60,25 @@ semantic-layer//.yaml ```yaml name: orders -description: Customer orders with booked revenue. +descriptions: + user: Customer orders with booked revenue. table: public.orders grain: - order_id columns: - name: order_id type: string - description: Unique order identifier. + descriptions: + user: Unique order identifier. - name: order_date type: time role: time - description: Date the order was placed. + descriptions: + user: Date the order was placed. - name: total_amount type: number - description: Booked order value in USD. + descriptions: + user: Booked order value in USD. measures: - name: total_revenue expr: SUM(total_amount) @@ -85,7 +89,8 @@ measures: ```yaml name: orders -description: Customer orders with line-item totals. +descriptions: + user: Customer orders with line-item totals. table: public.orders grain: - order_id @@ -93,26 +98,31 @@ grain: columns: - name: order_id type: string - description: Unique order identifier. + descriptions: + user: Unique order identifier. - name: order_date type: time role: time - description: Date the order was placed. + descriptions: + user: Date the order was placed. - name: status type: string visibility: public - description: Current order status. + descriptions: + user: Current order status. - name: _etl_loaded_at type: time visibility: hidden - description: Internal load timestamp. + descriptions: + user: Internal load timestamp. - name: total_amount type: number - description: Order total in USD. + descriptions: + user: Order total in USD. measures: - name: total_revenue @@ -149,9 +159,10 @@ joins: | Field | Required | Description | |-------|----------|-------------| | `name` | Yes | Source identifier. Use lowercase words and underscores. | +| `descriptions` | No | Description map keyed by source, such as `user`, `dbt`, or `ai`. | | `table` or `sql` | Yes | Database table or custom SQL expression. Use exactly one. | | `grain` | Yes | Columns that uniquely identify a row at the source grain. | -| `columns` | No | Column definitions with type, role, visibility, and descriptions. | +| `columns` | Yes | Non-empty column definitions with type, role, visibility, and descriptions. | | `measures` | No | Aggregation expressions such as `SUM`, `COUNT`, and `AVG`. | | `segments` | No | Named predicates agents can reuse. | | `joins` | No | Relationships to other semantic sources. | @@ -165,7 +176,7 @@ joins: | Column | `type` | Yes | Agent-facing type: `string`, `number`, `time`, or `boolean`. | | Column | `role` | No | Special role such as `time` for default time dimensions. | | Column | `visibility` | No | `public`, `internal`, or `hidden`. | -| Column | `description` | Strongly recommended | Business meaning and usage notes. | +| Column | `descriptions` | Strongly recommended | Description map keyed by source, such as `user`, `dbt`, or `ai`. | | Measure | `name` | Yes | Queryable metric name. | | Measure | `expr` | Yes | SQL aggregation expression at the source grain. | | Measure | `filter` | No | SQL predicate applied only to this measure. | diff --git a/docs-site/content/docs/integrations/agent-clients.mdx b/docs-site/content/docs/integrations/agent-clients.mdx index de628197..01cbbca5 100644 --- a/docs-site/content/docs/integrations/agent-clients.mdx +++ b/docs-site/content/docs/integrations/agent-clients.mdx @@ -75,7 +75,7 @@ Available commands: - `ktx status --json --project-dir /path/to/project` - `ktx sl list --json --project-dir /path/to/project` - `ktx sl search '' --json --project-dir /path/to/project --connection-id ''` -- `ktx sl query --json --project-dir /path/to/project --connection-id '' --query-file '' --execute --max-rows 100` +- `ktx sl query --project-dir /path/to/project --connection-id '' --query-file '' --format json --execute --max-rows 100` - `ktx wiki search '' --json --project-dir /path/to/project --limit 10` ``` @@ -172,7 +172,7 @@ All supported agent clients call the same KTX CLI commands: | `ktx sl list --json` | List semantic-layer sources | | `ktx sl search --json` | Search semantic-layer sources | | `ktx sl validate --connection-id ` | Validate semantic source definitions | -| `ktx sl query --json` | Execute a semantic-layer query when semantic compute is configured | +| `ktx sl query --format json` | Execute a semantic-layer query when semantic compute is configured | ### Security constraints diff --git a/docs-site/content/docs/integrations/index.mdx b/docs-site/content/docs/integrations/index.mdx index 8f77a624..92a677aa 100644 --- a/docs-site/content/docs/integrations/index.mdx +++ b/docs-site/content/docs/integrations/index.mdx @@ -34,8 +34,9 @@ automation flags documented in [`ktx setup`](/docs/cli-reference/ktx-setup). | Path | Purpose | |------|---------| -| `ktx.yaml` | Main project configuration for providers, embeddings, connections, source mappings, query history, and setup state | +| `ktx.yaml` | Main project configuration for providers, embeddings, connections, source mappings, and query history | | `.ktx/secrets/*` | Local file-backed secrets when you choose file references during setup | +| `.ktx/setup/*` | Local setup progress and context-build state | | `semantic-layer//` | YAML semantic sources generated by database and source ingestion | | `wiki/` | Markdown business context, definitions, and ingested knowledge | | `.ktx/agents/install-manifest.json` | Manifest of agent integration files installed by `ktx setup --agents` | diff --git a/docs-site/content/docs/integrations/primary-sources.mdx b/docs-site/content/docs/integrations/primary-sources.mdx index a3d4db29..00cc39aa 100644 --- a/docs-site/content/docs/integrations/primary-sources.mdx +++ b/docs-site/content/docs/integrations/primary-sources.mdx @@ -228,7 +228,7 @@ mapping metadata. The BigQuery connector still authenticates with the | Feature | Supported | Notes | |---------|-----------|-------| | Tables & views | Yes | Including materialized views and external tables | -| Primary keys | No | - | +| Primary keys | Yes | Via `INFORMATION_SCHEMA` table constraints when declared | | Foreign keys | No | Not available in BigQuery | | Row count estimates | Yes | From table metadata | | Column statistics | No | - | @@ -500,7 +500,7 @@ No authentication required - SQLite is file-based. The file must be readable by - Uses `LIMIT X OFFSET Y` for pagination - SQLite type affinity system: `TEXT`, `NUMERIC`, `INTEGER`, `REAL`, `BLOB` - Foreign key enforcement requires explicit `PRAGMA foreign_keys = ON` -- In-memory databases supported with `path: ":memory:"` (for testing) +- Database file must exist before `ktx connection test` or ingest runs ## Common errors diff --git a/docs-site/tests/product-mechanics-content.test.mjs b/docs-site/tests/product-mechanics-content.test.mjs new file mode 100644 index 00000000..6992d9a1 --- /dev/null +++ b/docs-site/tests/product-mechanics-content.test.mjs @@ -0,0 +1,86 @@ +import assert from "node:assert/strict"; +import { readFile } from "node:fs/promises"; +import { dirname, join } from "node:path"; +import { test } from "node:test"; +import { fileURLToPath } from "node:url"; + +const docsSiteDir = join(dirname(fileURLToPath(import.meta.url)), ".."); + +async function readDocsFile(path) { + return readFile(join(docsSiteDir, path), "utf8"); +} + +test("docs introduction shows the ingestion and runtime mechanics early", async () => { + const introduction = await readDocsFile( + "content/docs/getting-started/introduction.mdx", + ); + + assert.match( + introduction, + /import\s+\{\s*ProductMechanics\s*\}\s+from\s+"@\/components\/product-mechanics";/, + ); + assert.match(introduction, //); + + const heroIndex = introduction.indexOf("Make analytics context"); + const mechanicsIndex = introduction.indexOf(""); + const useCaseIndex = introduction.indexOf("## What agents can do with KTX"); + const heroSource = introduction.slice(0, mechanicsIndex); + + assert.ok(heroIndex >= 0, "introduction should include the custom hero"); + assert.ok( + mechanicsIndex > heroIndex, + "mechanics component should appear after the hero", + ); + assert.ok( + mechanicsIndex < useCaseIndex, + "mechanics component should appear before use-case sections", + ); + assert.doesNotMatch(heroSource, /Get Started/); + assert.doesNotMatch(heroSource, /The Context Layer/); + assert.doesNotMatch(heroSource, /Building Context/); + assert.doesNotMatch(heroSource, /flex flex-wrap gap-3/); +}); + +test("product mechanics component covers source-specific context and SQL expansion", async () => { + const component = await readDocsFile("components/product-mechanics.tsx"); + + for (const expectedText of [ + "A semantic compiler for analytics agents", + "Ingestion", + "Runtime", + "wiki/", + "semantic-layer/", + "raw-sources/", + ".ktx/", + "sl_refs", + "Company documentation", + "Notion pages", + "Metabase", + "query history", + "extract evidence", + "reconcile entities", + "validate references", + "semantic query plan", + "dialect SQL", + "bounded rows", + "provenance", + "measure: orders.total_revenue", + "dimension: customers.segment", + "select", + ]) { + assert.ok( + component.includes(expectedText), + `component should include: ${expectedText}`, + ); + } + + assert.doesNotMatch(component, /KTX does more than retrieve Markdown/); + assert.doesNotMatch(component, /Plain Markdown \+ RAG/); + assert.doesNotMatch(component, /comparisonRows/); + assert.doesNotMatch(component, /ComparisonTable/); + assert.doesNotMatch(component, /Not just retrieval/); + assert.doesNotMatch(component, /KTX works in two moments/); + assert.doesNotMatch(component, /w-\[calc\(100vw/); + assert.doesNotMatch(component, /xl:grid-cols-2/); + assert.doesNotMatch(component, /lg:grid-cols-\[[^\]]*_2rem_/); +}); diff --git a/docs/superpowers/plans/2026-05-15-semantic-layer-docs.md b/docs/superpowers/plans/2026-05-15-semantic-layer-docs.md new file mode 100644 index 00000000..59e5d5bf --- /dev/null +++ b/docs/superpowers/plans/2026-05-15-semantic-layer-docs.md @@ -0,0 +1,328 @@ +# Semantic Layer Docs Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [x]`) syntax for tracking. + +**Goal:** Add a standalone, scannable Concepts page that explains the semantic-layer internals while positioning KTX as a broader context layer. + +**Architecture:** Implement this as docs-only MDX content inside the existing Fumadocs tree. The new page uses inline MDX diagrams and Fumadocs color tokens, matching the custom diagram pattern already used in `the-context-layer.mdx`. + +**Tech Stack:** MDX, Fumadocs content, Next.js docs site, pnpm workspace commands. + +--- + +### Task 1: Add Concepts Navigation Entry + +**Files:** +- Modify: `docs-site/content/docs/concepts/meta.json` + +- [x] **Step 1: Update the Concepts page order** + +Replace the `pages` array with: + +```json +{ + "title": "Concepts", + "defaultOpen": true, + "pages": ["the-context-layer", "semantic-layer-internals", "context-as-code"] +} +``` + +- [x] **Step 2: Verify JSON parses** + +Run: + +```bash +node -e "JSON.parse(require('node:fs').readFileSync('docs-site/content/docs/concepts/meta.json', 'utf8')); console.log('concepts meta ok')" +``` + +Expected output: + +```text +concepts meta ok +``` + +### Task 2: Create the Semantic Layer Internals Page + +**Files:** +- Create: `docs-site/content/docs/concepts/semantic-layer-internals.mdx` + +- [x] **Step 1: Add frontmatter and opening positioning** + +Create the page with this frontmatter and opening section: + +```mdx +--- +title: Semantic Layer Internals +description: How KTX uses join graphs, grain, and relationship metadata to turn context into safe SQL. +--- + +KTX is a context layer for agents. Its semantic layer is the query-planning core +that turns reviewed context into safe SQL. + +Use this page to understand the mechanics behind KTX's semantic execution: +the join graph, how KTX builds and maintains it, and how that graph prevents +classic analytics errors like fan-out and ambiguous join paths. + +| KTX is | KTX is not just | +|---|---| +| A context layer for agents | A metric definition store | +| A system for ingesting, reviewing, and serving analytics context | A markdown saver | +| A semantic execution layer plus wiki pages, scans, provenance, and agent workflows | A replacement for every BI semantic layer | +``` + +- [x] **Step 2: Add the system-fit diagram** + +Add a `Where the semantic layer fits` section with a custom `not-prose` diagram. +The diagram must show: + +```text +Context inputs -> Semantic layer engine -> Agent workflows +``` + +The semantic-layer box must be visually prominent and list: + +```text +join graph +grain +measures +relationships +safe query planning +``` + +- [x] **Step 3: Add the join graph section** + +Add `## The join graph` with: + +- one short paragraph defining nodes and edges; +- bullets for why the graph matters; +- an inline diagram using `orders`, `customers`, `order_items`, and `refunds`. + +The section must include this claim in plain language: + +```text +The graph lets KTX choose valid paths, reject unsafe paths, and reason about +whether a join preserves or multiplies rows before SQL is generated. +``` + +- [x] **Step 4: Add build and maintenance sections** + +Add `## How KTX builds it` and `## How KTX maintains it`. + +`How KTX builds it` must cover these inputs: + +```text +declared primary keys +declared foreign keys +inferred relationships +dbt, MetricFlow, and LookML imports +query history +analyst review +``` + +`How KTX maintains it` must show this loop: + +```text +ingest evidence -> YAML diff -> validation -> analyst review -> agent use -> corrections +``` + +- [x] **Step 5: Add the fan-out and safe execution sections** + +Add `## Why grain and relationships matter` with a fan-out example comparing +orders joined to order items. Include a compact table with columns: + +```text +Problem +What happens +How KTX avoids it +``` + +Add `## How the execution engine uses the graph` with a before/after table: + +```text +Naive SQL shape +Semantic-layer SQL shape +``` + +The safe path must mention: + +```text +pre-aggregates fact measures at their own grain before joining dimensions +``` + +- [x] **Step 6: Add agent outcome links** + +Add a closing `## What this means for agents` section with bullets explaining +that agents can: + +```text +search semantic sources +compile SQL through ktx sl query +validate changes before review +patch YAML and Markdown files in git +explain provenance and metric meaning +``` + +End with links to: + +```mdx +[Writing Context](/docs/guides/writing-context) +[ktx sl](/docs/cli-reference/ktx-sl) +``` + +### Task 3: Add the Cross-Link from The Context Layer + +**Files:** +- Modify: `docs-site/content/docs/concepts/the-context-layer.mdx` + +- [x] **Step 1: Replace the semantic sources paragraph with a scannable block** + +Find the `**Semantic sources**` paragraph under `KTX organizes context into four pillars`. +Replace the long paragraph with: + +```mdx +**Semantic sources** are YAML definitions that describe your data in terms +agents can reason about: + +- source tables or SQL queries; +- row grain; +- typed columns; +- valid joins; +- named measures, filters, and segments. + +This is where "revenue means `sum(amount)` excluding refunds" lives. For the +join graph, fan-out protections, and execution mechanics, read +[Semantic Layer Internals](/docs/concepts/semantic-layer-internals). +``` + +- [x] **Step 2: Confirm the page still owns the product positioning** + +Search the edited file: + +```bash +rg -n "context layer|Semantic Layer Internals|semantic layer - that's a critical component" docs-site/content/docs/concepts/the-context-layer.mdx +``` + +Expected: output includes the existing context-layer framing and the new internals link. + +### Task 4: Fix Mobile Docs Header Overflow + +**Files:** +- Modify: `docs-site/app/docs/[[...slug]]/page.tsx` + +- [x] **Step 1: Stack title actions on narrow screens** + +Replace the non-hero page header wrapper: + +```tsx +
    +``` + +with: + +```tsx +
    +``` + +This keeps desktop layout unchanged while preventing the action buttons from +forcing horizontal overflow on mobile. + +- [x] **Step 2: Allow the docs article to shrink in the layout grid** + +Update the `DocsPage` and `DocsBody` wrappers: + +```tsx + +``` + +```tsx + +``` + +This prevents tables, code blocks, and custom diagrams from forcing the +Fumadocs main article column wider than the mobile viewport, overrides the +library's built-in max-width rule on mobile, aligns the article to the left on +mobile, and preserves the normal centered desktop max width. + +If long words still clip under mobile viewport capture, add the same wrapping +behavior used by the Fumadocs sidebar: + +```tsx + + {page.data.description} + +``` + +```tsx + +``` + +- [x] **Step 3: Recheck mobile render** + +Capture or inspect a 390px-wide render of: + +```text +http://127.0.0.1:3000/docs/concepts/semantic-layer-internals +``` + +Expected: the title, description, action buttons, and positioning block stay +within the viewport. + +### Task 5: Verify Docs Content and Build + +**Files:** +- Check: `docs-site/content/docs/concepts/semantic-layer-internals.mdx` +- Check: `docs-site/content/docs/concepts/the-context-layer.mdx` +- Check: `docs-site/content/docs/concepts/meta.json` +- Check: `docs-site/app/docs/[[...slug]]/page.tsx` + +- [x] **Step 1: Run content checks** + +Run: + +```bash +rg -n "KTX is a context layer|markdown saver|fan-out|join graph|pre-aggregates|Semantic Layer Internals" docs-site/content/docs/concepts +``` + +Expected: matches appear in the new page and the cross-link appears in +`the-context-layer.mdx`. + +- [x] **Step 2: Build the docs site** + +Run: + +```bash +pnpm --filter ktx-docs build +``` + +Expected: build exits 0. + +- [x] **Step 3: Preview locally** + +Run: + +```bash +pnpm --filter ktx-docs dev +``` + +Open: + +```text +http://localhost:3000/docs/concepts/semantic-layer-internals +``` + +Inspect desktop and mobile widths. The opening should clearly position KTX as a +context layer, the Concepts navigation should list the new page, and diagrams +should not overlap or produce unreadable text. + +- [x] **Step 4: Commit implementation** + +Run: + +```bash +git status --short +git add docs-site/content/docs/concepts/meta.json docs-site/content/docs/concepts/semantic-layer-internals.mdx docs-site/content/docs/concepts/the-context-layer.mdx docs-site/app/docs/[[...slug]]/page.tsx docs/superpowers/plans/2026-05-15-semantic-layer-docs.md +git commit -m "docs: add semantic layer internals concept" +``` diff --git a/docs/superpowers/specs/2026-05-15-semantic-layer-docs-design.md b/docs/superpowers/specs/2026-05-15-semantic-layer-docs-design.md new file mode 100644 index 00000000..34d7594c --- /dev/null +++ b/docs/superpowers/specs/2026-05-15-semantic-layer-docs-design.md @@ -0,0 +1,166 @@ +# Semantic Layer Docs Design + +**Date:** 2026-05-15 +**Status:** Design - pending implementation plan + +## Goal + +Add a concise Concepts page that explains the semantic layer as the query +planning engine inside KTX's broader context layer. + +The page should make the technical depth visible to skeptical data users +without positioning KTX as only a semantic-layer product. Success means a reader +understands: + +- KTX is a context layer for agents. +- The semantic layer is one core subsystem inside that context layer. +- The join graph, grain declarations, and relationship metadata are what make + generated SQL safer than schema-only or markdown-only approaches. +- KTX maintains this semantic layer through ingest, validation, analyst edits, + and reviewable files. + +## Current State + +The docs currently explain semantic sources in two places: + +- `docs-site/content/docs/concepts/the-context-layer.mdx` describes semantic + sources as one pillar of KTX context. +- `docs-site/content/docs/guides/writing-context.mdx` documents the YAML fields + for sources, measures, joins, grain, validation, and common errors. + +That content is useful, but the differentiator is not visually obvious. The +semantic layer is embedded in longer narrative pages, so readers can miss the +hard parts: join graph construction, fan-out prevention, chasm traps, and query +planning. + +## Positioning + +Create a standalone Concepts page with a guarded title such as +`Semantic Layer Internals` or `The Semantic Engine Inside KTX`. + +The first screen must frame the product clearly: + +> KTX is a context layer. Its semantic layer is the query-planning core that +> turns reviewed context into safe SQL. + +The page should avoid a title like `Semantic Layer` by itself because that can +make KTX look like a narrow semantic-layer tool. The page should repeatedly show +the semantic layer between the broader context inputs and the agent workflows it +supports. + +Add a short cross-link from `the-context-layer.mdx` so the existing overview +keeps owning the product category. That section should say the semantic layer is +one critical pillar, then link to the internals page for readers who want the +mechanics. + +## Page Structure + +Add `docs-site/content/docs/concepts/semantic-layer-internals.mdx` and include +it in `docs-site/content/docs/concepts/meta.json` after `the-context-layer`. + +Recommended sections: + +1. `What this page explains` + - One short paragraph. + - A two-column `KTX is / KTX is not just` table. + +2. `Where the semantic layer fits` + - A visual block showing: + `context inputs -> semantic layer engine -> agent workflows`. + - Inputs include semantic YAML, wiki pages, scans, and provenance. + - Outputs include search, SQL generation, explanations, edits, and review. + +3. `The join graph` + - Explain nodes as semantic sources and edges as validated joins. + - Show a small graph with `orders`, `customers`, `order_items`, and + `refunds`. + - Keep text to one or two short paragraphs plus bullets. + +4. `How KTX builds it` + - Show a pipeline from database evidence and imported modeling tools to + reviewable YAML. + - Mention declared keys, inferred relationships, dbt/MetricFlow/LookML + imports, query history, validation, and analyst review. + +5. `How KTX maintains it` + - Show a feedback loop: + ingest evidence -> YAML diff -> validation -> analyst review -> agent use + -> corrections. + - Emphasize that files remain the source of truth. + +6. `Why grain and relationships matter` + - Use the fan-out problem as the central example. + - Compare a naive join against a safe semantic-layer plan. + - Explain many-to-one, one-to-many, many-to-many, chasm traps, and ambiguous + paths in compact bullets. + +7. `How the execution engine uses the graph` + - Explain path selection, unsafe path rejection, pre-aggregation into CTEs, + filter placement, and dialect transpilation. + - Include a small before/after SQL-shape diagram or table. + +8. `What this means for agents` + - Summarize why this is more than saving markdown: + agents can inspect, query, validate, edit, and review the same semantic + files. + - Link to `Writing Context` and `ktx sl`. + +## Scannability Rules + +The implementation should shorten long prose blocks across the touched pages. + +- Keep most text blocks to one or two paragraphs. +- Prefer bullets, tables, diagrams, and compact callout blocks between prose. +- Avoid four-paragraph narrative runs. +- Use diagrams before dense explanations when the concept is spatial. +- Keep examples concrete and copy-pasteable. + +## Visual Direction + +Use the existing docs-site MDX style rather than a new design system. The current +`the-context-layer.mdx` page already uses custom `not-prose` MDX diagrams with +Fumadocs color tokens; the new page should follow that pattern. + +The diagrams should feel like technical product documentation: + +- restrained, dense, and readable; +- high contrast for the semantic-layer engine box; +- visible arrows or adjacency that make flow obvious; +- tables for classification and comparison; +- no marketing hero, decorative gradients, or generic card-heavy layout. + +## Non-goals + +- Do not redesign the whole docs site. +- Do not rename KTX concepts, packages, commands, or directories. +- Do not claim KTX replaces every BI or semantic-layer system. +- Do not add implementation details that are not true in the current codebase. +- Do not expand the page into a long reference for every YAML field; keep that + in `Writing Context`. + +## Verification + +Because this is docs-only work, verification should focus on the docs site: + +- Run the docs build or the narrowest available docs-site type/build check. +- Run formatting or lint checks if the docs package exposes them. +- Preview the page locally and inspect desktop and mobile widths. +- Confirm the page is listed in Concepts navigation. +- Confirm the opening section clearly says KTX is a context layer, not just a + semantic-layer tool. + +If implementation changes only MDX and metadata, TypeScript workspace tests are +not required unless the page introduces shared components. + +## Acceptance Criteria + +- A standalone Concepts page explains the semantic-layer internals. +- The Context Layer page links to the new internals page without making the + overview longer. +- The new page includes diagrams for the system fit, join graph, maintenance + loop, and fan-out-safe execution path. +- Long prose is broken into scannable sections with bullets, tables, and visual + interruptions. +- The positioning consistently says KTX is a context layer with a semantic + execution core. +- Docs-site verification passes or any skipped check is reported with a reason. diff --git a/packages/cli/src/setup-agents.test.ts b/packages/cli/src/setup-agents.test.ts index 9fb6903a..cccf1474 100644 --- a/packages/cli/src/setup-agents.test.ts +++ b/packages/cli/src/setup-agents.test.ts @@ -91,6 +91,9 @@ describe('setup agents', () => { expect(skill).toContain('must not print secrets'); expect(skill).toContain('status --json'); expect(skill).toContain('sl list --json'); + expect(skill).toContain('sl query'); + expect(skill).toContain('--format json'); + expect(skill).not.toContain('sl query --json'); expect(skill).not.toContain('agent '); expect(skill).not.toContain('sql execute'); expect(await readKtxAgentInstallManifest(tempDir)).toMatchObject({ @@ -150,6 +153,8 @@ describe('setup agents', () => { expect(skill).not.toContain('`ktx agent'); expect(skill).toContain('status --json'); expect(skill).toContain('sl query'); + expect(skill).toContain('--format json'); + expect(skill).not.toContain('sl query --json'); expect(skill).not.toContain('sql execute'); }); diff --git a/packages/cli/src/setup-agents.ts b/packages/cli/src/setup-agents.ts index a065fc41..ae7e91dc 100644 --- a/packages/cli/src/setup-agents.ts +++ b/packages/cli/src/setup-agents.ts @@ -310,7 +310,8 @@ function ktxCommandLine(launcher: KtxCliLauncher, args: string[]): string { } function cliInstructionContent(input: { projectDir: string; launcher: KtxCliLauncher }): string { - const projectDirArgs = ['--json', '--project-dir', input.projectDir]; + const projectDirArgs = ['--project-dir', input.projectDir]; + const jsonProjectDirArgs = ['--json', ...projectDirArgs]; return [ '---', 'name: ktx', @@ -327,9 +328,9 @@ function cliInstructionContent(input: { projectDir: string; launcher: KtxCliLaun '', 'Available commands:', '', - `- \`${ktxCommandLine(input.launcher, ['status', ...projectDirArgs])}\``, - `- \`${ktxCommandLine(input.launcher, ['sl', 'list', ...projectDirArgs])}\``, - `- \`${ktxCommandLine(input.launcher, ['sl', 'search', '', ...projectDirArgs, '--connection-id', ''])}\``, + `- \`${ktxCommandLine(input.launcher, ['status', ...jsonProjectDirArgs])}\``, + `- \`${ktxCommandLine(input.launcher, ['sl', 'list', ...jsonProjectDirArgs])}\``, + `- \`${ktxCommandLine(input.launcher, ['sl', 'search', '', ...jsonProjectDirArgs, '--connection-id', ''])}\``, `- \`${ktxCommandLine(input.launcher, [ 'sl', 'query', @@ -338,11 +339,13 @@ function cliInstructionContent(input: { projectDir: string; launcher: KtxCliLaun '', '--query-file', '', + '--format', + 'json', '--execute', '--max-rows', '100', ])}\``, - `- \`${ktxCommandLine(input.launcher, ['wiki', 'search', '', ...projectDirArgs, '--limit', '10'])}\``, + `- \`${ktxCommandLine(input.launcher, ['wiki', 'search', '', ...jsonProjectDirArgs, '--limit', '10'])}\``, '', 'Use semantic-layer queries before direct database access. Do not print secrets or credential references.', '',