diff --git a/packages/context/skills/_shared/identifier-verification.md b/packages/context/skills/_shared/identifier-verification.md new file mode 100644 index 00000000..21f1da68 --- /dev/null +++ b/packages/context/skills/_shared/identifier-verification.md @@ -0,0 +1,27 @@ +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionName, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe: + `SELECT DISTINCT FROM LIMIT 50`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution` with `SELECT 1 FROM LIMIT 0`. If it errors, the + identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.` placeholder strings from these instructions + into output. diff --git a/packages/context/skills/dbt_ingest/SKILL.md b/packages/context/skills/dbt_ingest/SKILL.md index 135dd2e5..4d5b54c4 100644 --- a/packages/context/skills/dbt_ingest/SKILL.md +++ b/packages/context/skills/dbt_ingest/SKILL.md @@ -12,16 +12,16 @@ Use this skill for **uploaded** dbt projects (`dbt_project.yml` at stage root, ` | dbt | KTX | Notes | |-----|--------|--------| -| `models:` entry with `columns:` | **Overlay** on the manifest table with the same name (after `wiki_sl_search` / `sl_describe_table`) | One SL source per physical table; model name may differ from DB name — resolve with `read_raw_file` + warehouse context. | +| `models:` entry with `columns:` | **Overlay** on the manifest table with the same name (after `discover_data` / `entity_details`) | One SL source per physical table; model name may differ from DB name — resolve with `read_raw_file` + warehouse context. | | `sources:` → `tables:` | Same as models; use `identifier` when present instead of logical `name`. | Schema + name must match how the connection sees tables. | | Column `description` | `descriptions.user` or merged `descriptions` map on the column | Do not overwrite `dbt` description keys from sync. | | `data_tests: not_null` / `unique` | Short hint in column `descriptions` or notes: “dbt: not null”, “dbt: unique” | Full structured metadata lands in manifest via **sync**; the skill keeps bundle-time SL text useful for the agent. | -| `accepted_values` | Add a **brief** line in the column description: allowed values (truncate long lists) | Also mention enum-like use in `wiki_sl_search` / filters. | -| `relationships` | Add or confirm `joins:` on the overlay **only** when `to` resolves to a real table via `read_raw_file` + `wiki_sl_search` / `sl_describe_table` | If the ref cannot be resolved, capture the intent in a wiki page instead. | +| `accepted_values` | Add a **brief** line in the column description: allowed values (truncate long lists) | Also mention enum-like use in `discover_data` / filters. | +| `relationships` | Add or confirm `joins:` on the overlay **only** when `to` resolves to a real table via `read_raw_file` + `discover_data` / `entity_details` | If the ref cannot be resolved, capture the intent in a wiki page instead. | ## Physical schema grounding -dbt YAML is documentation and test metadata; it is not permission to invent physical columns. Before writing any table-backed SL source, confirm the real warehouse shape with `wiki_sl_search`, `sl_discover`, or `sl_describe_table` and use only confirmed column names in `columns:`, `grain:`, `joins:`, `segments:`, and `measures[].expr`. +dbt YAML is documentation and test metadata; it is not permission to invent physical columns. Before writing any table-backed SL source, confirm the real warehouse shape with `discover_data`, `sl_discover`, or `entity_details` and use only confirmed column names in `columns:`, `grain:`, `joins:`, `segments:`, and `measures[].expr`. For dbt context-source ingest, the dbt connection is usually not the warehouse connection. Call `sl_discover` without `connectionId` first, then write overlays to the connection that owns the matching manifest-backed source (for example `postgres-warehouse`), not to the dbt connection (for example `dbt-main`). If no matching manifest-backed source is visible on any warehouse connection, do not call `sl_write_source`; record `emit_unmapped_fallback` and keep the fact wiki-only. @@ -31,6 +31,34 @@ Include `rawPaths` on every `wiki_write`, `sl_write_source`, and `sl_edit_source After every `sl_write_source`, call `sl_validate`. A validation error saying a declared column or measure reference is absent from the physical table is a hard stop: re-read the warehouse-backed source and rewrite with confirmed names, or remove the invalid SL fields. +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionName, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe: + `SELECT DISTINCT FROM LIMIT 50`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution` with `SELECT 1 FROM LIMIT 0`. If it errors, the + identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.
` placeholder strings from these instructions + into output. + ## 1.1 test hints (descriptions / meta) When YAML shows `accepted_values` or `not_null`, add **short** hints into `columns[].descriptions` (e.g. under `user`) or freeform column notes so chat and validation see intent before the next git sync refreshes `constraints` / `enum_values` in `_schema`. Keep hints under a few words when possible. diff --git a/packages/context/skills/historic_sql_patterns/SKILL.md b/packages/context/skills/historic_sql_patterns/SKILL.md index 33eb6fe0..aaf7a26c 100644 --- a/packages/context/skills/historic_sql_patterns/SKILL.md +++ b/packages/context/skills/historic_sql_patterns/SKILL.md @@ -18,6 +18,37 @@ Use this skill when the WorkUnit raw file is a `patterns-input/part-0001.json` s 6. Set each evidence object's `rawPath` to the exact raw file path read in step 3. 7. Stop after all pattern evidence has been emitted. +Every join column mentioned in pattern descriptions must be verified via +entity_details for both sides of the join. + +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionName, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe: + `SELECT DISTINCT FROM LIMIT 50`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution` with `SELECT 1 FROM LIMIT 0`. If it errors, the + identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.
` placeholder strings from these instructions + into output. + ## Evidence Shape Each call to `emit_historic_sql_evidence` must use this shape: diff --git a/packages/context/skills/historic_sql_table_digest/SKILL.md b/packages/context/skills/historic_sql_table_digest/SKILL.md index 34e49d27..669b3eec 100644 --- a/packages/context/skills/historic_sql_table_digest/SKILL.md +++ b/packages/context/skills/historic_sql_table_digest/SKILL.md @@ -17,6 +17,14 @@ Use this skill when the WorkUnit raw file is one `tables/..json` f 5. Call `emit_historic_sql_evidence` exactly once with `kind: "table_usage"`. 6. Stop after the evidence tool succeeds. +## Identifier Verification Protocol + +Only mention columns visible in the table's scan record. Use +`entity_details({connectionName, targets: [{display: ""}]})` if +the table or column attribution is uncertain. Do not infer join columns or +filters from neighboring SQL unless the scan record confirms the column exists +on the named table. + ## Evidence Shape Call `emit_historic_sql_evidence` with this shape: diff --git a/packages/context/skills/knowledge_capture/SKILL.md b/packages/context/skills/knowledge_capture/SKILL.md index 1e6a8f6c..e514e780 100644 --- a/packages/context/skills/knowledge_capture/SKILL.md +++ b/packages/context/skills/knowledge_capture/SKILL.md @@ -40,6 +40,8 @@ If nothing is worth capturing, respond without calling any tool. 1. Read the wiki index (provided in the prompt) and decide whether the turn introduces durable knowledge. 2. **Before writing**, search for related content so cross-references are accurate: + - `discover_data` first when a page relates to data or SL concepts — find + existing wiki pages, SL sources, and raw warehouse schema together. - `wiki_search` with the topic — find related wiki pages to populate `refs`. - `sl_discover` with the concept — if the page defines a metric (revenue, churn, retention, LTV, ARR, MRR, CAC, attribution, etc.), find matching SL sources or measures to populate `sl_refs`. If no matches, pass `sl_refs: []` so future readers know you checked. 3. If updating an existing page, `wiki_read` it first. Use the returned `structured.content` or markdown body as the exact stored text for targeted replacements; current tags, refs, and sl_refs are returned in structured metadata. @@ -48,6 +50,34 @@ If nothing is worth capturing, respond without calling any tool. For bundle/external ingest, include `rawPaths` on every `wiki_write`/`wiki_remove` call with only the raw files that directly support that wiki action. This keeps ingest provenance tied to the actual source file, not every file in the WorkUnit. +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionName, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe: + `SELECT DISTINCT FROM LIMIT 50`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution` with `SELECT 1 FROM LIMIT 0`. If it errors, the + identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.
` placeholder strings from these instructions + into output. + ## Keys, summaries, and content - **Keys** are short kebab-case topic identifiers: `leads-source-filter`, `revenue-definition`, `churn-calculation`. No namespacing, no prefixes. diff --git a/packages/context/skills/live_database_ingest/SKILL.md b/packages/context/skills/live_database_ingest/SKILL.md index 9db52484..0b9074e9 100644 --- a/packages/context/skills/live_database_ingest/SKILL.md +++ b/packages/context/skills/live_database_ingest/SKILL.md @@ -24,6 +24,37 @@ Use this skill when the ingest work unit contains raw files under or column comments. 9. Run `sl_validate` for the table source before the work unit completes. +Sample values come from the scan record; do not invent values not present in +relationship-profile.json. + +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionName, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe: + `SELECT DISTINCT FROM LIMIT 50`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution` with `SELECT 1 FROM LIMIT 0`. If it errors, the + identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.
` placeholder strings from these instructions + into output. + ## Source shape For a raw table with this shape: diff --git a/packages/context/skills/looker_ingest/SKILL.md b/packages/context/skills/looker_ingest/SKILL.md index 462a5910..87dfe1b7 100644 --- a/packages/context/skills/looker_ingest/SKILL.md +++ b/packages/context/skills/looker_ingest/SKILL.md @@ -21,6 +21,37 @@ Looker runtime ingest turns API-staged dashboards, Looks, and explores into dura 9. Write SL from Looker runtime evidence only through the staged warehouse target contract. For explores and inherited dashboard/Look queries, branch on `targetTable.ok`; when it is true, write on `targetWarehouseConnectionId` and use `targetTable.canonicalTable` as `source.table`. When it is false or missing, write wiki knowledge candidates and record `emit_unmapped_fallback` with the staged reason. 10. Run `sl_validate` after every SL write. If validation fails, fix the source or roll it back before the WorkUnit ends. +For every Looker field reference, call entity_details on the underlying +schema.table.column before promoting it to sl_refs or quoting it in wiki body. + +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionName, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe: + `SELECT DISTINCT FROM LIMIT 50`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution` with `SELECT 1 FROM LIMIT 0`. If it errors, the + identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.
` placeholder strings from these instructions + into output. + ## Explore WorkUnits Explore WUs have raw files like `explores//.json` and usually depend on `lookml_models.json`. diff --git a/packages/context/skills/lookml_ingest/SKILL.md b/packages/context/skills/lookml_ingest/SKILL.md index 18b43f3e..44725699 100644 --- a/packages/context/skills/lookml_ingest/SKILL.md +++ b/packages/context/skills/lookml_ingest/SKILL.md @@ -51,6 +51,37 @@ LookML's `dimension_group: date { type: time; timeframes: [raw, date, week, mont A prior replay hallucinated `date_date`, `date_week` into `sql:`, `columns:`, and `grain:` across 4+ standalones; every measure on each affected source returned `400 Unrecognized name: date_date` at query time. Preventable. +Verify each sql_table_name from the LookML view with entity_details before +mapping to an SL source. + +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionName, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe: + `SELECT DISTINCT FROM LIMIT 50`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution` with `SELECT 1 FROM LIMIT 0`. If it errors, the + identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.
` placeholder strings from these instructions + into output. + **Required flow before writing any overlay or standalone**: 1. Call `sl_discover()` for each base table you're about to touch. That returns the real columns. diff --git a/packages/context/skills/metabase_ingest/SKILL.md b/packages/context/skills/metabase_ingest/SKILL.md index 061760bf..3b2535e4 100644 --- a/packages/context/skills/metabase_ingest/SKILL.md +++ b/packages/context/skills/metabase_ingest/SKILL.md @@ -44,6 +44,37 @@ Use `resultMetadata` to: - `lastRunAt`: ISO timestamp of the card's last execution. If null or very old, the card may be dead; prefer skipping over creating a source. - `dashboardCount`: number of dashboards referencing the card. Cards with `dashboardCount: 0` and a stale `lastRunAt` are strong skip signals. +Before writing a wiki page derived from a Metabase question SQL, verify each +schema.table.column mentioned with entity_details. + +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionName, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe: + `SELECT DISTINCT FROM LIMIT 50`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution` with `SELECT 1 FROM LIMIT 0`. If it errors, the + identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.
` placeholder strings from these instructions + into output. + ## Decision tree For each card: diff --git a/packages/context/skills/metricflow_ingest/SKILL.md b/packages/context/skills/metricflow_ingest/SKILL.md index 5a24cda8..a24bab06 100644 --- a/packages/context/skills/metricflow_ingest/SKILL.md +++ b/packages/context/skills/metricflow_ingest/SKILL.md @@ -29,6 +29,37 @@ A MetricFlow `semantic_model` maps to an SL source; MetricFlow `measures` map to Type map: MetricFlow `time` to KTX `time`; `categorical` to `string`; `number` to `number`; `boolean` to `boolean`. Follow `expr` over `name` when both differ — `expr` is the physical column. +Verify each MetricFlow model source table with entity_details before producing +the corresponding sl_write_source. + +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionName, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe: + `SELECT DISTINCT FROM LIMIT 50`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution` with `SELECT 1 FROM LIMIT 0`. If it errors, the + identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.
` placeholder strings from these instructions + into output. + ## Flattening `extends:` Within one WorkUnit, multiple semantic_models linked by `extends:` are guaranteed to be present (the chunker groups them). Resolve inheritance **before** writing: diff --git a/packages/context/skills/notion_synthesize/SKILL.md b/packages/context/skills/notion_synthesize/SKILL.md index 933acc55..f4bf7f83 100644 --- a/packages/context/skills/notion_synthesize/SKILL.md +++ b/packages/context/skills/notion_synthesize/SKILL.md @@ -67,10 +67,38 @@ Search existing wiki pages for the same `tables:` or `sl_refs:` frontmatter and - Do not create SL sources under the Notion connection just because a page mentions a warehouse, dbt, Looker, or Metabase object. Use the mapped warehouse/source connection after discovery, or emit an unmapped fallback and write wiki-only. - Distinguish fallback reasons precisely: if a non-Notion warehouse/dbt connection exists but `sl_discover` cannot find the named table/source, use `no_physical_table`; reserve `no_connection_mapping` for cases where there is no plausible non-Notion target connection at all. - If `sl_discover` resolves the table/source, do not call `emit_unmapped_fallback` for that table. Use the resolved source for `sl_refs`, overlay edits, or wiki-only documentation. -- When calling `emit_unmapped_fallback`, pass the table or source identifier as `tableRef` (e.g. `tableRef: "orbit_analytics.customer"`) — the tool generates the canonical detail string from the reason code and `tableRef`. Use the optional `clarification` field only to add context that does not contradict the reason. Do not restate the reason in `clarification`. +- When calling `emit_unmapped_fallback`, pass the table or source identifier as `tableRef` (e.g. `tableRef: ".
"`) — the tool generates the canonical detail string from the reason code and `tableRef`. Use the optional `clarification` field only to add context that does not contradict the reason. Do not restate the reason in `clarification`. + +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionName, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe: + `SELECT DISTINCT FROM LIMIT 50`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution` with `SELECT 1 FROM LIMIT 0`. If it errors, the + identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.
` placeholder strings from these instructions + into output. ## Tools -Allowed: `read_raw_file`, `read_raw_span`, `wiki_search`, `wiki_read`, `wiki_write`, `sl_discover`, `sl_read_source`, `sl_write_source`, `sl_edit_source`, `sl_validate`, `context_evidence_search`, `context_evidence_read`, `context_evidence_neighbors`, `emit_unmapped_fallback`, `eviction_list`, `context_eviction_decision_write`. +Allowed: `read_raw_file`, `read_raw_span`, `wiki_search`, `wiki_read`, `wiki_write`, `discover_data`, `entity_details`, `sql_execution`, `sl_discover`, `sl_read_source`, `sl_write_source`, `sl_edit_source`, `sl_validate`, `context_evidence_search`, `context_evidence_read`, `context_evidence_neighbors`, `emit_unmapped_fallback`, `eviction_list`, `context_eviction_decision_write`. Not allowed: `context_candidate_write`, `context_candidate_mark`. diff --git a/packages/context/skills/sl/SKILL.md b/packages/context/skills/sl/SKILL.md index 2719b9d4..9cdb8b34 100644 --- a/packages/context/skills/sl/SKILL.md +++ b/packages/context/skills/sl/SKILL.md @@ -13,6 +13,10 @@ This skill covers two parts: Capture (when and how to add new patterns to the SL) is a separate concern handled by the memory-agent — see the `sl_capture` skill if you are running in capture mode. The research agent **reads** and **queries** the SL via the tools described here; it does not write to it. +For capture-time identifier verification, load `sl_capture`. Synthesis writer +skills must verify warehouse identifiers with `discover_data`, +`entity_details`, and `sql_execution` before emitting table or column names. + --- ## Part 1 — Schema reference diff --git a/packages/context/skills/sl_capture/SKILL.md b/packages/context/skills/sl_capture/SKILL.md index ffb1780d..abb84170 100644 --- a/packages/context/skills/sl_capture/SKILL.md +++ b/packages/context/skills/sl_capture/SKILL.md @@ -174,6 +174,37 @@ Wiki-only is correct when the user is documenting *about* the measure (definition in business terms, owner, policy, glossary, examples of when to use it) without changing its SQL expression or filters. +Before sl_write_source, call entity_details on the target table to confirm +column names and types match the YAML being written. + +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionName, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe: + `SELECT DISTINCT FROM LIMIT 50`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution` with `SELECT 1 FROM LIMIT 0`. If it errors, the + identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.
` placeholder strings from these instructions + into output. + ## Tool sequence 1. `sl_discover` — see what source files exist. diff --git a/packages/context/src/ingest/tools/emit-unmapped-fallback.tool.ts b/packages/context/src/ingest/tools/emit-unmapped-fallback.tool.ts index aaba3509..33a8610e 100644 --- a/packages/context/src/ingest/tools/emit-unmapped-fallback.tool.ts +++ b/packages/context/src/ingest/tools/emit-unmapped-fallback.tool.ts @@ -64,7 +64,7 @@ export function createEmitUnmappedFallbackTool(deps: EmitUnmappedFallbackDeps) { tableRef: z .string() .optional() - .describe('The fully-qualified table or source reference that triggered the fallback (e.g. "orbit_analytics.customer"). Used to generate canonical detail text.'), + .describe('The fully-qualified table or source reference that triggered the fallback (e.g. ".
"). Used to generate canonical detail text.'), clarification: z .string() .optional() diff --git a/packages/context/src/sl/tools/sl-warehouse-validation.ts b/packages/context/src/sl/tools/sl-warehouse-validation.ts index f9c5e4fd..a200dad9 100644 --- a/packages/context/src/sl/tools/sl-warehouse-validation.ts +++ b/packages/context/src/sl/tools/sl-warehouse-validation.ts @@ -90,7 +90,7 @@ export async function validateSingleSource( `writing it as-is drops the manifest's columns and joins. ` + `Remove "sql:", "table:", "grain:", "columns:", and "joins:" and keep only ` + `"name:" plus "measures:"/"segments:"/"description:" to write an overlay ` + - `that inherits the manifest schema. Call sl_describe_table to see it first.`, + `that inherits the manifest schema. Call sl_read_source to inspect the existing source first.`, ); return { errors, warnings }; }