ktx/docs-site/lib/llm-docs.ts

import { source } from "@/lib/source";
import { readDocsPageMarkdown } from "@/lib/docs-markdown";

const siteOrigin = "https://docs.kaelio.com/ktx";

export type LlmDocsPage = {
  title: string;
  description?: string;
  url: string;
  markdownUrl: string;
  slug: string[];
  getMarkdown: () => Promise<string>;
};

export function getLlmDocsPages(): LlmDocsPage[] {
  return source.getPages().map(toLlmDocsPage);
}

export function getLlmDocsPage(slug: string[] | undefined) {
  const page = source.getPage(slug);
  return page ? toLlmDocsPage(page) : null;
}

export async function getPageMarkdown(page: LlmDocsPage) {
  const description = page.description ? `\n\n> ${page.description}` : "";
  const body = await page.getMarkdown();

  return normalizeMarkdown(`# ${page.title}${description}

Canonical URL: ${absoluteUrl(page.url)}
Markdown URL: ${absoluteUrl(page.markdownUrl)}

${body}
`);
}

export function buildLlmsTxt() {
  const pages = getLlmDocsPages();
  const byUrl = new Map(pages.map((page) => [page.url, page]));
  const link = (url: string, label: string, fallbackDescription: string) => {
    const page = byUrl.get(url);
    const description = page?.description ?? fallbackDescription;
    const markdownUrl = page?.markdownUrl ?? `${url}.md`;
    return `- [${label}](${absoluteUrl(markdownUrl)}): ${description}`;
  };

  return `# KTX

> Agent-native context layer for analytics engineering and database agents.

KTX provides semantic-layer files, warehouse scans, wiki pages, provenance, and agent-facing tools that help coding agents answer analytics questions without inventing metrics or joins.

## Agent Entry Points

${link("/docs/ai-resources", "AI Resources", "Machine-readable docs, prompt recipes, and agent setup paths")}
${link("/docs/ai-resources/agent-quickstart", "Agent Quickstart", "Task-first route for coding assistants using KTX")}
${link("/docs/ai-resources/markdown-access", "Markdown Access", "Fetch KTX docs as llms.txt, llms-full.txt, or per-page Markdown")}
${link("/docs/ai-resources/agent-instructions", "Agent Instructions", "Suggested instructions for coding assistants that need to read and cite KTX docs")}

## Start Here

${link("/docs/getting-started/introduction", "Introduction", "What KTX is and who it is for")}
${link("/docs/getting-started/quickstart", "Quickstart", "Set up KTX and build your first context")}
${link("/docs/guides/writing-context", "Writing Context", "Write semantic sources and wiki pages")}

## Machine-Readable Documentation

- [Full documentation](${absoluteUrl("/llms-full.txt")}): All docs pages in one plain-text markdown response
- [Markdown access guide](${absoluteUrl("/docs/ai-resources/markdown-access.md")}): How to fetch llms.txt, llms-full.txt, and per-page Markdown
- [Quickstart markdown](${absoluteUrl("/docs/getting-started/quickstart.md")}): Human setup walkthrough
- [Semantic-layer CLI markdown](${absoluteUrl("/docs/cli-reference/ktx-sl.md")}): Semantic-layer commands and JSON output
- [Wiki CLI markdown](${absoluteUrl("/docs/cli-reference/ktx-wiki.md")}): Wiki page commands and JSON output

## CLI Reference

${link("/docs/cli-reference/ktx-setup", "ktx setup", "Interactive project setup")}
${link("/docs/cli-reference/ktx-sl", "ktx sl", "Semantic-layer commands")}
${link("/docs/cli-reference/ktx-wiki", "ktx wiki", "Wiki page commands")}
${link("/docs/cli-reference/ktx-connection", "ktx connection", "Connection management commands")}

## Integrations

${link("/docs/integrations/primary-sources", "Primary Sources", "Connect KTX to databases and warehouses")}
${link("/docs/integrations/context-sources", "Context Sources", "Ingest dbt, LookML, Metabase, Looker, MetricFlow, and Notion")}

## All Documentation

${buildPageIndex(pages)}
`;
}

export async function buildLlmsFullTxt() {
  const rendered = await Promise.all(getLlmDocsPages().map(getPageMarkdown));
  return [`# KTX Full Documentation`, `Source: ${siteOrigin}`, ...rendered].join(
    "\n\n---\n\n",
  );
}

function toLlmDocsPage(page: ReturnType<typeof source.getPages>[number]) {
  return {
    title: page.data.title,
    description: page.data.description,
    url: page.url,
    markdownUrl: `${page.url}.md`,
    slug: page.slugs,
    getMarkdown: async () => normalizeMarkdown(await readDocsPageMarkdown(page.slugs)),
  } satisfies LlmDocsPage;
}

function normalizeMarkdown(markdown: string) {
  return markdown
    .trim()
    .replace(/^---\n[\s\S]*?\n---\n?/, "")
    .trim()
    .replace(/\n{3,}/g, "\n\n");
}

function buildPageIndex(pages: LlmDocsPage[]) {
  const grouped = new Map<string, LlmDocsPage[]>();

  for (const page of pages) {
    const category = page.slug[0] ?? "general";
    grouped.set(category, [...(grouped.get(category) ?? []), page]);
  }

  return [...grouped.entries()]
    .map(([category, categoryPages]) => {
      const links = categoryPages
        .map((page) => {
          const description = page.description ? `: ${page.description}` : "";
          return `- [${page.title}](${absoluteUrl(page.markdownUrl)})${description}`;
        })
        .join("\n");

      return `### ${formatCategoryName(category)}

${links}`;
    })
    .join("\n\n");
}

function absoluteUrl(path: string) {
  return `${siteOrigin}${path}`;
}

function formatCategoryName(category: string) {
  const labels: Record<string, string> = {
    "ai-resources": "AI Resources",
    "cli-reference": "CLI Reference",
  };

  if (labels[category]) {
    return labels[category];
  }

  return category
    .split("-")
    .map((word) => word.charAt(0).toUpperCase() + word.slice(1))
    .join(" ");
}
feat(docs-site): add agent-readable docs routes 2026-05-11 16:40:34 -07:00			`import { source } from "@/lib/source";`
feat: merge ingest and scan * docs: add CLI component reuse guidance * docs: add unified ingest ux design * Refine unified ingest UX design after adversarial review iteration 1 * Refine unified ingest UX design after adversarial review iteration 2 * Refine unified ingest UX design after adversarial review iteration 3 * feat(cli): route public connection ingest command * feat(cli): hide standalone scan from public help * feat(cli): plan public ingest depth and query history * feat(cli): execute public database ingest facets * feat(ingest): read connection query history config * fix(cli): use public ingest wording * fix(config): stop generating ingest adapter allow lists * docs: document public ingest command * test: align ingest surface expectations * docs: add unified ingest public CLI surface plan * feat(cli): preflight deep public ingest readiness * feat(setup): store query history in connection context * feat(setup): store database context depth * feat(setup): verify context readiness by database depth * fix(setup): keep context build foreground only * fix(config): reject reserved ingest connection ids * test: close unified ingest v1 expectations * docs: add unified ingest v1 closure plan * fix(ingest): bypass adapter allow-list for public source ingest * fix(ingest): honor query history window intent * fix(ingest): hide scan internals from public database ingest * feat(ingest): use foreground view for interactive public ingest * fix(setup): use schema context and query history wording * test(cli): verify unified ingest public output * docs: add unified ingest v1 public output closure plan * fix(setup): forward query history flags * fix(setup): prompt for postgres query history * fix(status): report query history readiness * fix(ingest): remove legacy public guidance * fix(ingest): polish foreground retry copy * docs(examples): use unified query history wording * chore(ingest): finish public query history cleanup * docs: add unified ingest v1 query history status cleanup plan * test(docs): cover unified ingest public docs * docs: align ingest CLI reference with unified UX * docs: update context build guides for unified ingest * docs: update setup and primary source ingest wording * docs: stop advertising adapter-backed example ingest * docs: close unified ingest public docs gaps * docs: add unified ingest v1 docs site closure plan * fix: render unified ingest foreground warnings * fix: explain query history schema order * fix: add public ingest retry guidance * fix: align setup next steps with unified ingest * fix: remove scan wording from demo progress * test: verify unified ingest ux closure * docs: add unified ingest v1 foreground and retry closure plan * fix(cli): preserve query-history pull config in public ingest * fix(cli): omit hidden commands from docs command tree * test(cli): close unified ingest final public surface checks * docs: add unified ingest v1 final public surface closure plan * fix(cli): use public source labels in ingest reports * fix(cli): suppress low-level public ingest output * test(cli): verify unified ingest public plain output * docs: add unified ingest v1 public plain output closure plan * fix(cli): add public ingest copy sanitizers * fix(cli): sanitize public ingest progress copy * fix(cli): rename setup schema scope prompt * docs(plan): add progress copy closure; test: align setup back-nav fixture Adds the iter9 plan and updates the setup back-navigation test fixture to pass disableQueryHistory plus listSchemas/listTables stubs that the unified ingest setup step now requires. * docs(plan): add final ux labels plan with narrowed label scans * fix(cli): aggregate unsupported query-history warnings * fix(cli): align setup database labels * test(cli): fix setup database test type-check * fix(cli): remove primary-source wording from setup output * test(cli): verify unified ingest setup closure * docs(plan): add unified ingest v1 verification copy closure plan * fix(cli): remove top-level scan command * fix(cli): remove legacy ingest and wiki commands * Merge scan into ingest flow * feat(cli): split ingest progress into per-phase rows, rename work units to tasks Each database target in the unified ingest dashboard now renders one row per real subprocess (Schema, then Query history when enabled) instead of a single combined bar. Each phase has its own monotonic 0-100% bar so the progress never snaps back to zero when historic-sql starts after scan completes. Completed phases keep their final bar, summary, and elapsed time visible as an inline audit trail; queued and skipped phases are shown explicitly. Also rename user-facing "work units" / "Failed work units" to "tasks" / "Failed tasks" in ingest output and parseIngestSummary. The parser still accepts the legacy "Work units:" wording in captured output for backward compat. Internal memory-flow event names and type fields are left alone. * Fix test harness failures * Fix CI smoke checks --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> 2026-05-14 01:43:06 +02:00			`import { readDocsPageMarkdown } from "@/lib/docs-markdown";`
feat(docs-site): add agent-readable docs routes 2026-05-11 16:40:34 -07:00
docs(docs-site): update AI resource URLs and nav order 2026-05-11 20:03:53 -07:00			`const siteOrigin = "https://docs.kaelio.com/ktx";`
feat(docs-site): add agent-readable docs routes 2026-05-11 16:40:34 -07:00
			`export type LlmDocsPage = {`
			`title: string;`
			`description?: string;`
			`url: string;`
			`markdownUrl: string;`
			`slug: string[];`
			`getMarkdown: () => Promise<string>;`
			`};`

			`export function getLlmDocsPages(): LlmDocsPage[] {`
			`return source.getPages().map(toLlmDocsPage);`
			`}`

			`export function getLlmDocsPage(slug: string[] \| undefined) {`
			`const page = source.getPage(slug);`
			`return page ? toLlmDocsPage(page) : null;`
			`}`

			`export async function getPageMarkdown(page: LlmDocsPage) {`
			const description = page.description ? `\n\n> ${page.description}` : "";
			`const body = await page.getMarkdown();`

			return normalizeMarkdown(`# ${page.title}${description}

docs(docs-site): update AI resource URLs and nav order 2026-05-11 20:03:53 -07:00			`Canonical URL: ${absoluteUrl(page.url)}`
			`Markdown URL: ${absoluteUrl(page.markdownUrl)}`
feat(docs-site): add agent-readable docs routes 2026-05-11 16:40:34 -07:00
			`${body}`
			`);
			`}`

			`export function buildLlmsTxt() {`
			`const pages = getLlmDocsPages();`
			`const byUrl = new Map(pages.map((page) => [page.url, page]));`
			`const link = (url: string, label: string, fallbackDescription: string) => {`
			`const page = byUrl.get(url);`
			`const description = page?.description ?? fallbackDescription;`
docs(docs-site): add AI resources for agents 2026-05-11 17:20:11 -07:00			const markdownUrl = page?.markdownUrl ?? `${url}.md`;
docs(docs-site): update AI resource URLs and nav order 2026-05-11 20:03:53 -07:00			return `- [${label}](${absoluteUrl(markdownUrl)}): ${description}`;
feat(docs-site): add agent-readable docs routes 2026-05-11 16:40:34 -07:00			`};`

			return `# KTX

			`> Agent-native context layer for analytics engineering and database agents.`

feat: rename project wiki directory (#66) * feat: rename project wiki directory * test: fix wiki skill ordering expectations * Show configured context sources in setup 2026-05-13 16:05:58 +02:00			`KTX provides semantic-layer files, warehouse scans, wiki pages, provenance, and agent-facing tools that help coding agents answer analytics questions without inventing metrics or joins.`
feat(docs-site): add agent-readable docs routes 2026-05-11 16:40:34 -07:00
docs(docs-site): add AI resources for agents 2026-05-11 17:20:11 -07:00			`## Agent Entry Points`

			`${link("/docs/ai-resources", "AI Resources", "Machine-readable docs, prompt recipes, and agent setup paths")}`
			`${link("/docs/ai-resources/agent-quickstart", "Agent Quickstart", "Task-first route for coding assistants using KTX")}`
docs(docs-site): separate docs-agent resources 2026-05-11 19:35:46 -07:00			`${link("/docs/ai-resources/markdown-access", "Markdown Access", "Fetch KTX docs as llms.txt, llms-full.txt, or per-page Markdown")}`
			`${link("/docs/ai-resources/agent-instructions", "Agent Instructions", "Suggested instructions for coding assistants that need to read and cite KTX docs")}`
docs(docs-site): add AI resources for agents 2026-05-11 17:20:11 -07:00
feat(docs-site): add agent-readable docs routes 2026-05-11 16:40:34 -07:00			`## Start Here`

			`${link("/docs/getting-started/introduction", "Introduction", "What KTX is and who it is for")}`
			`${link("/docs/getting-started/quickstart", "Quickstart", "Set up KTX and build your first context")}`
feat: rename project wiki directory (#66) * feat: rename project wiki directory * test: fix wiki skill ordering expectations * Show configured context sources in setup 2026-05-13 16:05:58 +02:00			`${link("/docs/guides/writing-context", "Writing Context", "Write semantic sources and wiki pages")}`
feat(docs-site): add agent-readable docs routes 2026-05-11 16:40:34 -07:00
			`## Machine-Readable Documentation`

docs(docs-site): update AI resource URLs and nav order 2026-05-11 20:03:53 -07:00			`- [Full documentation](${absoluteUrl("/llms-full.txt")}): All docs pages in one plain-text markdown response`
			`- [Markdown access guide](${absoluteUrl("/docs/ai-resources/markdown-access.md")}): How to fetch llms.txt, llms-full.txt, and per-page Markdown`
			`- [Quickstart markdown](${absoluteUrl("/docs/getting-started/quickstart.md")}): Human setup walkthrough`
feat(cli)!: remove ktx agent command (#58) * feat(cli)!: remove ktx agent command * test(context): update PGlite boundary guardrail 2026-05-13 13:01:56 +02:00			`- [Semantic-layer CLI markdown](${absoluteUrl("/docs/cli-reference/ktx-sl.md")}): Semantic-layer commands and JSON output`
feat: rename project wiki directory (#66) * feat: rename project wiki directory * test: fix wiki skill ordering expectations * Show configured context sources in setup 2026-05-13 16:05:58 +02:00			`- [Wiki CLI markdown](${absoluteUrl("/docs/cli-reference/ktx-wiki.md")}): Wiki page commands and JSON output`
docs(docs-site): add AI resources for agents 2026-05-11 17:20:11 -07:00
feat(docs-site): add agent-readable docs routes 2026-05-11 16:40:34 -07:00			`## CLI Reference`

			`${link("/docs/cli-reference/ktx-setup", "ktx setup", "Interactive project setup")}`
			`${link("/docs/cli-reference/ktx-sl", "ktx sl", "Semantic-layer commands")}`
feat: rename project wiki directory (#66) * feat: rename project wiki directory * test: fix wiki skill ordering expectations * Show configured context sources in setup 2026-05-13 16:05:58 +02:00			`${link("/docs/cli-reference/ktx-wiki", "ktx wiki", "Wiki page commands")}`
feat(docs-site): add agent-readable docs routes 2026-05-11 16:40:34 -07:00			`${link("/docs/cli-reference/ktx-connection", "ktx connection", "Connection management commands")}`

			`## Integrations`

			`${link("/docs/integrations/primary-sources", "Primary Sources", "Connect KTX to databases and warehouses")}`
			`${link("/docs/integrations/context-sources", "Context Sources", "Ingest dbt, LookML, Metabase, Looker, MetricFlow, and Notion")}`
docs(docs-site): add AI resources for agents 2026-05-11 17:20:11 -07:00
			`## All Documentation`

			`${buildPageIndex(pages)}`
feat(docs-site): add agent-readable docs routes 2026-05-11 16:40:34 -07:00			`;
			`}`

			`export async function buildLlmsFullTxt() {`
			`const rendered = await Promise.all(getLlmDocsPages().map(getPageMarkdown));`
			return [`# KTX Full Documentation`, `Source: ${siteOrigin}`, ...rendered].join(
			`"\n\n---\n\n",`
			`);`
			`}`

			`function toLlmDocsPage(page: ReturnType<typeof source.getPages>[number]) {`
			`return {`
			`title: page.data.title,`
			`description: page.data.description,`
			`url: page.url,`
			markdownUrl: `${page.url}.md`,
			`slug: page.slugs,`
feat: merge ingest and scan * docs: add CLI component reuse guidance * docs: add unified ingest ux design * Refine unified ingest UX design after adversarial review iteration 1 * Refine unified ingest UX design after adversarial review iteration 2 * Refine unified ingest UX design after adversarial review iteration 3 * feat(cli): route public connection ingest command * feat(cli): hide standalone scan from public help * feat(cli): plan public ingest depth and query history * feat(cli): execute public database ingest facets * feat(ingest): read connection query history config * fix(cli): use public ingest wording * fix(config): stop generating ingest adapter allow lists * docs: document public ingest command * test: align ingest surface expectations * docs: add unified ingest public CLI surface plan * feat(cli): preflight deep public ingest readiness * feat(setup): store query history in connection context * feat(setup): store database context depth * feat(setup): verify context readiness by database depth * fix(setup): keep context build foreground only * fix(config): reject reserved ingest connection ids * test: close unified ingest v1 expectations * docs: add unified ingest v1 closure plan * fix(ingest): bypass adapter allow-list for public source ingest * fix(ingest): honor query history window intent * fix(ingest): hide scan internals from public database ingest * feat(ingest): use foreground view for interactive public ingest * fix(setup): use schema context and query history wording * test(cli): verify unified ingest public output * docs: add unified ingest v1 public output closure plan * fix(setup): forward query history flags * fix(setup): prompt for postgres query history * fix(status): report query history readiness * fix(ingest): remove legacy public guidance * fix(ingest): polish foreground retry copy * docs(examples): use unified query history wording * chore(ingest): finish public query history cleanup * docs: add unified ingest v1 query history status cleanup plan * test(docs): cover unified ingest public docs * docs: align ingest CLI reference with unified UX * docs: update context build guides for unified ingest * docs: update setup and primary source ingest wording * docs: stop advertising adapter-backed example ingest * docs: close unified ingest public docs gaps * docs: add unified ingest v1 docs site closure plan * fix: render unified ingest foreground warnings * fix: explain query history schema order * fix: add public ingest retry guidance * fix: align setup next steps with unified ingest * fix: remove scan wording from demo progress * test: verify unified ingest ux closure * docs: add unified ingest v1 foreground and retry closure plan * fix(cli): preserve query-history pull config in public ingest * fix(cli): omit hidden commands from docs command tree * test(cli): close unified ingest final public surface checks * docs: add unified ingest v1 final public surface closure plan * fix(cli): use public source labels in ingest reports * fix(cli): suppress low-level public ingest output * test(cli): verify unified ingest public plain output * docs: add unified ingest v1 public plain output closure plan * fix(cli): add public ingest copy sanitizers * fix(cli): sanitize public ingest progress copy * fix(cli): rename setup schema scope prompt * docs(plan): add progress copy closure; test: align setup back-nav fixture Adds the iter9 plan and updates the setup back-navigation test fixture to pass disableQueryHistory plus listSchemas/listTables stubs that the unified ingest setup step now requires. * docs(plan): add final ux labels plan with narrowed label scans * fix(cli): aggregate unsupported query-history warnings * fix(cli): align setup database labels * test(cli): fix setup database test type-check * fix(cli): remove primary-source wording from setup output * test(cli): verify unified ingest setup closure * docs(plan): add unified ingest v1 verification copy closure plan * fix(cli): remove top-level scan command * fix(cli): remove legacy ingest and wiki commands * Merge scan into ingest flow * feat(cli): split ingest progress into per-phase rows, rename work units to tasks Each database target in the unified ingest dashboard now renders one row per real subprocess (Schema, then Query history when enabled) instead of a single combined bar. Each phase has its own monotonic 0-100% bar so the progress never snaps back to zero when historic-sql starts after scan completes. Completed phases keep their final bar, summary, and elapsed time visible as an inline audit trail; queued and skipped phases are shown explicitly. Also rename user-facing "work units" / "Failed work units" to "tasks" / "Failed tasks" in ingest output and parseIngestSummary. The parser still accepts the legacy "Work units:" wording in captured output for backward compat. Internal memory-flow event names and type fields are left alone. * Fix test harness failures * Fix CI smoke checks --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> 2026-05-14 01:43:06 +02:00			`getMarkdown: async () => normalizeMarkdown(await readDocsPageMarkdown(page.slugs)),`
feat(docs-site): add agent-readable docs routes 2026-05-11 16:40:34 -07:00			`} satisfies LlmDocsPage;`
			`}`

			`function normalizeMarkdown(markdown: string) {`
docs(docs-site): make core guides agent-friendly 2026-05-11 16:42:08 -07:00			`return markdown`
			`.trim()`
			`.replace(/^---\n[\s\S]*?\n---\n?/, "")`
			`.trim()`
			`.replace(/\n{3,}/g, "\n\n");`
feat(docs-site): add agent-readable docs routes 2026-05-11 16:40:34 -07:00			`}`
docs(docs-site): add AI resources for agents 2026-05-11 17:20:11 -07:00
			`function buildPageIndex(pages: LlmDocsPage[]) {`
			`const grouped = new Map<string, LlmDocsPage[]>();`

			`for (const page of pages) {`
			`const category = page.slug[0] ?? "general";`
			`grouped.set(category, [...(grouped.get(category) ?? []), page]);`
			`}`

			`return [...grouped.entries()]`
			`.map(([category, categoryPages]) => {`
			`const links = categoryPages`
			`.map((page) => {`
			const description = page.description ? `: ${page.description}` : "";
docs(docs-site): update AI resource URLs and nav order 2026-05-11 20:03:53 -07:00			return `- [${page.title}](${absoluteUrl(page.markdownUrl)})${description}`;
docs(docs-site): add AI resources for agents 2026-05-11 17:20:11 -07:00			`})`
			`.join("\n");`

			return `### ${formatCategoryName(category)}

			${links}`;
			`})`
			`.join("\n\n");`
			`}`

docs(docs-site): update AI resource URLs and nav order 2026-05-11 20:03:53 -07:00			`function absoluteUrl(path: string) {`
			return `${siteOrigin}${path}`;
			`}`

docs(docs-site): add AI resources for agents 2026-05-11 17:20:11 -07:00			`function formatCategoryName(category: string) {`
			`const labels: Record<string, string> = {`
			`"ai-resources": "AI Resources",`
			`"cli-reference": "CLI Reference",`
			`};`

			`if (labels[category]) {`
			`return labels[category];`
			`}`

			`return category`
			`.split("-")`
			`.map((word) => word.charAt(0).toUpperCase() + word.slice(1))`
			`.join(" ");`
			`}`