omnigraph/scripts/check-agents-md.sh

117 lines
2.9 KiB
Bash
Raw Permalink Normal View History

#!/usr/bin/env bash
# Verify that AGENTS.md and the docs audience indexes stay in sync.
#
# Checks:
# 1. Every docs/ link from AGENTS.md, docs/user/index.md, and
# docs/dev/index.md exists.
# 2. Every canonical docs file is discoverable from those indexes.
#
# Release notes are represented by the docs/releases/ directory entry instead
# of requiring every per-version release note to be linked individually.
set -euo pipefail
repo_root="$(cd "$(dirname "$0")/.." && pwd)"
cd "$repo_root"
index_files=(AGENTS.md docs/user/index.md docs/dev/index.md)
for index_file in "${index_files[@]}"; do
if [[ ! -f "$index_file" ]]; then
echo "error: $index_file not found" >&2
exit 1
fi
done
normalize_path() {
python3 - "$1" <<'PY'
import os
import sys
print(os.path.normpath(sys.argv[1]).replace(os.sep, "/"))
PY
}
canonical=()
while IFS= read -r line; do
canonical+=("$line")
release: v0.5.0 (#115) * gitignore: exclude docs/internal/ from publication Mirrors the existing "Local-only working files (not for the public repo)" pattern. Working notes filed under docs/internal/ stay on the contributor's machine instead of cluttering the published doc tree or tripping the AGENTS.md / docs-index cross-link check (scripts/check-agents-md.sh enumerates every docs/*.md and requires each one to be linked from an audience index — internal notes don't have an audience index by definition). Incidental to the v0.5.0 release; lands separately from the version bump commits. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * ci: skip docs/internal/ in agents-md cross-link check Matches the .gitignore exclusion. Mirrors the existing 'docs/releases/' exclusion pattern: notes under docs/internal/ aren't part of the published doc tree and don't need to be linked from an audience index. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * release: v0.5.0 — Lance 6 substrate, Cedar policy engine, schema-lint v1 Bumps the workspace from 0.4.2 to 0.5.0. Release notes at docs/releases/v0.5.0.md. Three user-visible pillars motivate the minor bump: 1. Lance 6.0.1 substrate (DataFusion 52→53, Arrow 57→58) 2. Engine-wide Cedar policy enforcement on every _as writer; server defaults to deny-all; signed-token-claim-only actor identity 3. Schema-lint v1 chassis: OG-XXX-NNN codes, soft drops, and `--allow-data-loss` (Hard mode) for destructive migrations Plus structured DataFusion Expr filter pushdown (unblocks CompOp::Contains via array_has), HTTP allow_data_loss parity, inline .gq sources on CLI/HTTP, optional CORS layer, and bug fixes (merge-insert dup-rowid, branch-merge coordinator restore on error, blob columns in branch merge). Sites bumped: - 5 crate [package].version lines (omnigraph, omnigraph-cli, omnigraph-compiler, omnigraph-policy, omnigraph-server) - 10 internal path-dep `version = "..."` constraints across the four manifests that depend on sister crates (engine, server, cli, plus engine's dev-dep on the compiler) - Cargo.lock (regenerated via cargo update --workspace) - AGENTS.md "Version surveyed:" - openapi.json `info.version` (regenerated via OMNIGRAPH_UPDATE_OPENAPI=1 cargo test -p omnigraph-server --test openapi) Verification: - cargo test --workspace --locked: 907/907 green - cargo test -p omnigraph-engine --test failpoints --features failpoints: 19/19 green - cargo test -p omnigraph-engine --test lance_surface_guards: 3/3 - scripts/check-agents-md.sh: clean Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 13:59:42 +01:00
done < <(find docs -type f -name '*.md' ! -path 'docs/releases/*' ! -path 'docs/internal/*' | sort)
if [[ -d docs/releases ]]; then
canonical+=("docs/releases/")
fi
linked=()
for index_file in "${index_files[@]}"; do
base_dir="$(dirname "$index_file")"
# Markdown links.
while IFS= read -r raw_link; do
link="${raw_link%%#*}"
[[ -z "$link" ]] && continue
[[ "$link" =~ ^[a-zA-Z][a-zA-Z0-9+.-]*: ]] && continue
[[ "$link" == /* ]] && continue
if [[ "$link" == docs/* ]]; then
normalized="$(normalize_path "$link")"
else
normalized="$(normalize_path "$base_dir/$link")"
fi
if [[ "$link" == */ ]]; then
normalized="${normalized%/}/"
fi
linked+=("$normalized")
done < <(
grep -oE '\[[^]]+\]\([^)]+\)' "$index_file" \
| sed -E 's/.*\(([^)]+)\).*/\1/' || true
)
# Agent import directives in AGENTS.md.
while IFS= read -r raw_link; do
link="${raw_link#@}"
linked+=("$(normalize_path "$link")")
done < <(grep -oE '^@docs/[^[:space:]]+' "$index_file" || true)
done
deduped=()
while IFS= read -r line; do
deduped+=("$line")
done < <(printf '%s\n' "${linked[@]}" | sort -u)
linked=("${deduped[@]}")
fail=0
for link in "${linked[@]}"; do
if [[ "$link" == */ ]]; then
if [[ ! -d "$link" ]]; then
echo "error: docs index links to missing directory: $link" >&2
fail=1
fi
else
if [[ ! -f "$link" ]]; then
echo "error: docs index links to missing file: $link" >&2
fail=1
fi
fi
done
for doc in "${canonical[@]}"; do
found=0
for link in "${linked[@]}"; do
if [[ "$link" == "$doc" ]]; then
found=1
break
fi
done
if [[ "$found" -eq 0 ]]; then
echo "error: doc not linked from AGENTS.md or audience indexes: $doc" >&2
fail=1
fi
done
if [[ "$fail" -ne 0 ]]; then
echo >&2
echo "AGENTS.md / docs indexes are out of sync. Update AGENTS.md, docs/user/index.md, or docs/dev/index.md." >&2
exit 1
fi
echo "AGENTS.md ↔ docs indexes OK (${#linked[@]} links, ${#canonical[@]} docs)."