feat: add historic sql evidence skills

2026-07-22 11:51:01 +02:00 · 2026-05-11 18:48:31 +02:00 · 2026-05-11 18:48:31 +02:00 · 954426289d
commit 954426289d
parent ef9358beb7
4 changed files with 141 additions and 54 deletions
--- a/packages/context/skills/historic_sql_patterns/SKILL.md
+++ b/packages/context/skills/historic_sql_patterns/SKILL.md
@ -0,0 +1,57 @@
+---
+name: historic_sql_patterns
+description: Identify recurring cross-table historic-SQL analytical intents and emit typed pattern evidence for deterministic wiki projection.
+callers: [memory_agent]
+---
+
+# Historic SQL Patterns
+
+Use this skill when the WorkUnit raw file is `patterns-input.json` from the `historic-sql` adapter.
+
+## Required Workflow
+
+1. Read the WorkUnit notes first.
+2. Call `read_raw_file` for `patterns-input.json`.
+3. Identify recurring analytical intents that span at least two tables and have repeated usage signal.
+4. Emit one `pattern` evidence object per durable cross-table intent by calling `emit_historic_sql_evidence`.
+5. Stop after all pattern evidence has been emitted.
+
+## Evidence Shape
+
+Each call to `emit_historic_sql_evidence` must use this shape:
+
+```json
+{
+  "kind": "pattern",
+  "rawPath": "patterns-input.json",
+  "pattern": {
+    "slug": "order-lifecycle-analysis",
+    "title": "Order Lifecycle Analysis",
+    "narrative": "Analysts compare order statuses with customer segments to understand lifecycle movement.",
+    "definitionSql": "select o.status, count(*) from public.orders o join public.customers c on c.id = o.customer_id group by o.status",
+    "tablesInvolved": ["public.orders", "public.customers"],
+    "slRefs": ["orders", "customers"],
+    "constituentTemplateIds": ["pg:1", "pg:2"]
+  }
+}
+```
+
+The `pattern` object must match `patternOutputSchema`; multiple calls together must form `patternsArraySchema`.
+
+## Pattern Selection Rules
+
+- Prefer patterns that involve two or more tables.
+- Prefer templates with `executionsBucket` at least `10-100` and `distinctUsersBucket` above solo usage.
+- Merge templates into one pattern only when the business intent is the same.
+- Use a stable kebab-case slug based on intent, not a template id.
+- Set `definitionSql` to the clearest representative SQL from a constituent template.
+- Set `slRefs` to source names when the source name is obvious from table names; omit uncertain refs rather than guessing.
+
+## Boundaries
+
+- Do not call wiki_write.
+- Do not call sl_write_source.
+- Do not call sl_edit_source.
+- Do not call context_candidate_write.
+- Do not create single-table pattern pages.
+- Do not copy credentials, tokens, user emails, or unredacted literals into evidence.
--- a/packages/context/skills/historic_sql_table_digest/SKILL.md
+++ b/packages/context/skills/historic_sql_table_digest/SKILL.md
@ -0,0 +1,61 @@
+---
+name: historic_sql_table_digest
+description: Convert one changed historic-SQL table usage bucket into typed table usage evidence for deterministic _schema projection.
+callers: [memory_agent]
+---
+
+# Historic SQL Table Digest
+
+Use this skill when the WorkUnit raw file is one `tables/<schema>.<name>.json` file from the `historic-sql` adapter.
+
+## Required Workflow
+
+1. Read the WorkUnit notes first.
+2. Call `read_raw_file` for the single `tables/<schema>.<name>.json` raw file.
+3. Read `manifest.json` only if the table JSON omits the dialect or the WorkUnit notes are unclear.
+4. Produce one concise usage narrative for this table from the staged table JSON.
+5. Call `emit_historic_sql_evidence` exactly once with `kind: "table_usage"`.
+6. Stop after the evidence tool succeeds.
+
+## Evidence Shape
+
+Call `emit_historic_sql_evidence` with this shape:
+
+```json
+{
+  "kind": "table_usage",
+  "table": "public.orders",
+  "rawPath": "tables/public.orders.json",
+  "usage": {
+    "narrative": "Orders are repeatedly queried for paid/refunded lifecycle analysis and customer-level rollups.",
+    "frequencyTier": "high",
+    "commonFilters": ["status", "created_at"],
+    "commonGroupBys": ["status"],
+    "commonJoins": [{ "table": "public.customers", "on": ["customer_id"] }],
+    "staleSince": null
+  }
+}
+```
+
+The `usage` object must match `tableUsageOutputSchema`.
+
+## Interpretation Rules
+
+- Treat `columnsByClause.where` as common filters.
+- Treat `columnsByClause.groupBy` as common group-bys.
+- Treat `observedJoins` as common joins.
+- Use `stats.executionsBucket`, `stats.distinctUsersBucket`, and `stats.recencyBucket` to choose `frequencyTier`.
+- Use `frequencyTier: "high"` only when executions and distinct users are both broad.
+- Use `frequencyTier: "mid"` for repeated team usage that is not broad enough for high.
+- Use `frequencyTier: "low"` for low-volume but present usage.
+- Use `frequencyTier: "unused"` only when the table input explicitly says the table is stale or has no recent templates.
+- Keep `narrative` short and concrete.
+
+## Boundaries
+
+- Do not call wiki_write.
+- Do not call sl_write_source.
+- Do not call sl_edit_source.
+- Do not call context_candidate_write.
+- Do not emit more than one table usage evidence object.
+- Do not invent columns, joins, or tables that are absent from the staged JSON.
--- a/packages/context/src/ingest/ingest-runtime-assets.test.ts
+++ b/packages/context/src/ingest/ingest-runtime-assets.test.ts
@ -14,14 +14,14 @@ const adapterSkillNames = [
  'metabase_ingest',
  'metricflow_ingest',
  'notion_synthesize',
-  'historic_sql_ingest',
+  'historic_sql_table_digest',
+  'historic_sql_patterns',
  'ingest_triage',
  'knowledge_capture',
  'sl_capture',
 ] as const;

 const adapterReconcileSkillNames = [
-  'historic_sql_curator',
  'ingest_triage',
  'knowledge_capture',
  'sl_capture',
@ -67,66 +67,34 @@ describe('ingest runtime assets', () => {
    await expect(prompts.loadPrompt('skills/light_extraction')).resolves.toContain('# Light Context Extraction');
  });

-  it('packages historic-SQL WorkUnit skill guidance from KTX assets', async () => {
+  it('packages historic-SQL table digest guidance from KTX assets', async () => {
    const registry = new SkillsRegistryService({ skillsDir });
-    const skills = await registry.listSkills(['historic_sql_ingest'], 'memory_agent');
+    const skills = await registry.listSkills(['historic_sql_table_digest'], 'memory_agent');

-    expect(skills.map((skill) => skill.name)).toEqual(['historic_sql_ingest']);
+    expect(skills.map((skill) => skill.name)).toEqual(['historic_sql_table_digest']);

-    const [skill] = skills;
-    if (!skill) {
-      throw new Error('historic_sql_ingest skill missing');
-    }
-
-    expect(skill.path.startsWith(skillsDir)).toBe(true);
-
-    const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8');
-    expect(body).toContain('# Historic SQL Ingest');
-    expect(body).toContain('Read exactly one historic-SQL template WorkUnit');
-    expect(body).toContain('metadata.json');
-    expect(body).toContain('page.md');
-    expect(body).toContain('usage.json');
-    expect(body).toContain('manifest.json');
-    expect(body).toContain('wiki_write');
-    expect(body).toContain('key: "queries/<intent_slug>"');
-    expect(body).toContain('"source": "historic-sql"');
-    expect(body).toContain('representative_sql');
-    expect(body).toContain('fingerprints');
-    expect(body).toContain('usage');
-    expect(body).toContain('SL proposal threshold');
-    expect(body).toContain('Do not group sibling templates');
-    expect(body).toContain('Do not copy sample bound_sql');
-    expect(body).not.toContain('store historic-SQL provenance in the markdown body');
+    const body = await readFile(join(skills[0]!.path, 'SKILL.md'), 'utf-8');
+    expect(body).toContain('# Historic SQL Table Digest');
+    expect(body).toContain('tables/<schema>.<name>.json');
+    expect(body).toContain('tableUsageOutputSchema');
+    expect(body).toContain('emit_historic_sql_evidence');
+    expect(body).toContain('Do not call wiki_write');
+    expect(body).toContain('Do not call sl_write_source');
    expect(body).not.toMatch(forbiddenProductPattern());
  });

-  it('packages historic-SQL curator reconcile guidance from KTX assets', async () => {
+  it('packages historic-SQL patterns guidance from KTX assets', async () => {
    const registry = new SkillsRegistryService({ skillsDir });
-    const skills = await registry.listSkills(['historic_sql_curator'], 'memory_agent');
+    const skills = await registry.listSkills(['historic_sql_patterns'], 'memory_agent');

-    expect(skills.map((skill) => skill.name)).toEqual(['historic_sql_curator']);
+    expect(skills.map((skill) => skill.name)).toEqual(['historic_sql_patterns']);

-    const [skill] = skills;
-    if (!skill) {
-      throw new Error('historic_sql_curator skill missing');
-    }
-
-    expect(skill.path.startsWith(skillsDir)).toBe(true);
-
-    const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8');
-    expect(body).toContain('# Historic SQL Curator');
-    expect(body).toContain('curator pagination');
-    expect(body).toContain('stage_list');
-    expect(body).toContain('stage_diff');
-    expect(body).toContain('read_raw_span');
-    expect(body).toContain('wiki_search');
-    expect(body).toContain('wiki_read');
-    expect(body).toContain('wiki_write');
-    expect(body).toContain('emit_artifact_resolution');
-    expect(body).toContain('emit_eviction_decision');
-    expect(body).toContain('categorical sub-cluster');
-    expect(body).toContain('historic-sql-demoted');
-    expect(body).toContain('Do not call `context_candidate_write`');
+    const body = await readFile(join(skills[0]!.path, 'SKILL.md'), 'utf-8');
+    expect(body).toContain('# Historic SQL Patterns');
+    expect(body).toContain('patterns-input.json');
+    expect(body).toContain('patternsArraySchema');
+    expect(body).toContain('emit_historic_sql_evidence');
+    expect(body).toContain('cross-table');
    expect(body).not.toMatch(forbiddenProductPattern());
  });
 });
--- a/packages/context/src/memory/memory-runtime-assets.test.ts
+++ b/packages/context/src/memory/memory-runtime-assets.test.ts
@ -15,7 +15,8 @@ const expectedSkillHeadings: Record<string, string> = {
  sl_capture: '# Semantic Layer',
 };
 const expectedAdapterSkillHeadings: Record<string, string> = {
-  historic_sql_ingest: '# Historic SQL Ingest',
+  historic_sql_patterns: '# Historic SQL Patterns',
+  historic_sql_table_digest: '# Historic SQL Table Digest',
  live_database_ingest: '# Live Database Ingest',
  looker_ingest: '# Looker Runtime Ingest',
  lookml_ingest: '# LookML to KTX Semantic Layer',