feat: add historic sql evidence skills

This commit is contained in:
Andrey Avtomonov 2026-05-11 18:48:31 +02:00
parent ef9358beb7
commit 954426289d
4 changed files with 141 additions and 54 deletions

View file

@ -0,0 +1,57 @@
---
name: historic_sql_patterns
description: Identify recurring cross-table historic-SQL analytical intents and emit typed pattern evidence for deterministic wiki projection.
callers: [memory_agent]
---
# Historic SQL Patterns
Use this skill when the WorkUnit raw file is `patterns-input.json` from the `historic-sql` adapter.
## Required Workflow
1. Read the WorkUnit notes first.
2. Call `read_raw_file` for `patterns-input.json`.
3. Identify recurring analytical intents that span at least two tables and have repeated usage signal.
4. Emit one `pattern` evidence object per durable cross-table intent by calling `emit_historic_sql_evidence`.
5. Stop after all pattern evidence has been emitted.
## Evidence Shape
Each call to `emit_historic_sql_evidence` must use this shape:
```json
{
"kind": "pattern",
"rawPath": "patterns-input.json",
"pattern": {
"slug": "order-lifecycle-analysis",
"title": "Order Lifecycle Analysis",
"narrative": "Analysts compare order statuses with customer segments to understand lifecycle movement.",
"definitionSql": "select o.status, count(*) from public.orders o join public.customers c on c.id = o.customer_id group by o.status",
"tablesInvolved": ["public.orders", "public.customers"],
"slRefs": ["orders", "customers"],
"constituentTemplateIds": ["pg:1", "pg:2"]
}
}
```
The `pattern` object must match `patternOutputSchema`; multiple calls together must form `patternsArraySchema`.
## Pattern Selection Rules
- Prefer patterns that involve two or more tables.
- Prefer templates with `executionsBucket` at least `10-100` and `distinctUsersBucket` above solo usage.
- Merge templates into one pattern only when the business intent is the same.
- Use a stable kebab-case slug based on intent, not a template id.
- Set `definitionSql` to the clearest representative SQL from a constituent template.
- Set `slRefs` to source names when the source name is obvious from table names; omit uncertain refs rather than guessing.
## Boundaries
- Do not call wiki_write.
- Do not call sl_write_source.
- Do not call sl_edit_source.
- Do not call context_candidate_write.
- Do not create single-table pattern pages.
- Do not copy credentials, tokens, user emails, or unredacted literals into evidence.

View file

@ -0,0 +1,61 @@
---
name: historic_sql_table_digest
description: Convert one changed historic-SQL table usage bucket into typed table usage evidence for deterministic _schema projection.
callers: [memory_agent]
---
# Historic SQL Table Digest
Use this skill when the WorkUnit raw file is one `tables/<schema>.<name>.json` file from the `historic-sql` adapter.
## Required Workflow
1. Read the WorkUnit notes first.
2. Call `read_raw_file` for the single `tables/<schema>.<name>.json` raw file.
3. Read `manifest.json` only if the table JSON omits the dialect or the WorkUnit notes are unclear.
4. Produce one concise usage narrative for this table from the staged table JSON.
5. Call `emit_historic_sql_evidence` exactly once with `kind: "table_usage"`.
6. Stop after the evidence tool succeeds.
## Evidence Shape
Call `emit_historic_sql_evidence` with this shape:
```json
{
"kind": "table_usage",
"table": "public.orders",
"rawPath": "tables/public.orders.json",
"usage": {
"narrative": "Orders are repeatedly queried for paid/refunded lifecycle analysis and customer-level rollups.",
"frequencyTier": "high",
"commonFilters": ["status", "created_at"],
"commonGroupBys": ["status"],
"commonJoins": [{ "table": "public.customers", "on": ["customer_id"] }],
"staleSince": null
}
}
```
The `usage` object must match `tableUsageOutputSchema`.
## Interpretation Rules
- Treat `columnsByClause.where` as common filters.
- Treat `columnsByClause.groupBy` as common group-bys.
- Treat `observedJoins` as common joins.
- Use `stats.executionsBucket`, `stats.distinctUsersBucket`, and `stats.recencyBucket` to choose `frequencyTier`.
- Use `frequencyTier: "high"` only when executions and distinct users are both broad.
- Use `frequencyTier: "mid"` for repeated team usage that is not broad enough for high.
- Use `frequencyTier: "low"` for low-volume but present usage.
- Use `frequencyTier: "unused"` only when the table input explicitly says the table is stale or has no recent templates.
- Keep `narrative` short and concrete.
## Boundaries
- Do not call wiki_write.
- Do not call sl_write_source.
- Do not call sl_edit_source.
- Do not call context_candidate_write.
- Do not emit more than one table usage evidence object.
- Do not invent columns, joins, or tables that are absent from the staged JSON.

View file

@ -14,14 +14,14 @@ const adapterSkillNames = [
'metabase_ingest',
'metricflow_ingest',
'notion_synthesize',
'historic_sql_ingest',
'historic_sql_table_digest',
'historic_sql_patterns',
'ingest_triage',
'knowledge_capture',
'sl_capture',
] as const;
const adapterReconcileSkillNames = [
'historic_sql_curator',
'ingest_triage',
'knowledge_capture',
'sl_capture',
@ -67,66 +67,34 @@ describe('ingest runtime assets', () => {
await expect(prompts.loadPrompt('skills/light_extraction')).resolves.toContain('# Light Context Extraction');
});
it('packages historic-SQL WorkUnit skill guidance from KTX assets', async () => {
it('packages historic-SQL table digest guidance from KTX assets', async () => {
const registry = new SkillsRegistryService({ skillsDir });
const skills = await registry.listSkills(['historic_sql_ingest'], 'memory_agent');
const skills = await registry.listSkills(['historic_sql_table_digest'], 'memory_agent');
expect(skills.map((skill) => skill.name)).toEqual(['historic_sql_ingest']);
expect(skills.map((skill) => skill.name)).toEqual(['historic_sql_table_digest']);
const [skill] = skills;
if (!skill) {
throw new Error('historic_sql_ingest skill missing');
}
expect(skill.path.startsWith(skillsDir)).toBe(true);
const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8');
expect(body).toContain('# Historic SQL Ingest');
expect(body).toContain('Read exactly one historic-SQL template WorkUnit');
expect(body).toContain('metadata.json');
expect(body).toContain('page.md');
expect(body).toContain('usage.json');
expect(body).toContain('manifest.json');
expect(body).toContain('wiki_write');
expect(body).toContain('key: "queries/<intent_slug>"');
expect(body).toContain('"source": "historic-sql"');
expect(body).toContain('representative_sql');
expect(body).toContain('fingerprints');
expect(body).toContain('usage');
expect(body).toContain('SL proposal threshold');
expect(body).toContain('Do not group sibling templates');
expect(body).toContain('Do not copy sample bound_sql');
expect(body).not.toContain('store historic-SQL provenance in the markdown body');
const body = await readFile(join(skills[0]!.path, 'SKILL.md'), 'utf-8');
expect(body).toContain('# Historic SQL Table Digest');
expect(body).toContain('tables/<schema>.<name>.json');
expect(body).toContain('tableUsageOutputSchema');
expect(body).toContain('emit_historic_sql_evidence');
expect(body).toContain('Do not call wiki_write');
expect(body).toContain('Do not call sl_write_source');
expect(body).not.toMatch(forbiddenProductPattern());
});
it('packages historic-SQL curator reconcile guidance from KTX assets', async () => {
it('packages historic-SQL patterns guidance from KTX assets', async () => {
const registry = new SkillsRegistryService({ skillsDir });
const skills = await registry.listSkills(['historic_sql_curator'], 'memory_agent');
const skills = await registry.listSkills(['historic_sql_patterns'], 'memory_agent');
expect(skills.map((skill) => skill.name)).toEqual(['historic_sql_curator']);
expect(skills.map((skill) => skill.name)).toEqual(['historic_sql_patterns']);
const [skill] = skills;
if (!skill) {
throw new Error('historic_sql_curator skill missing');
}
expect(skill.path.startsWith(skillsDir)).toBe(true);
const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8');
expect(body).toContain('# Historic SQL Curator');
expect(body).toContain('curator pagination');
expect(body).toContain('stage_list');
expect(body).toContain('stage_diff');
expect(body).toContain('read_raw_span');
expect(body).toContain('wiki_search');
expect(body).toContain('wiki_read');
expect(body).toContain('wiki_write');
expect(body).toContain('emit_artifact_resolution');
expect(body).toContain('emit_eviction_decision');
expect(body).toContain('categorical sub-cluster');
expect(body).toContain('historic-sql-demoted');
expect(body).toContain('Do not call `context_candidate_write`');
const body = await readFile(join(skills[0]!.path, 'SKILL.md'), 'utf-8');
expect(body).toContain('# Historic SQL Patterns');
expect(body).toContain('patterns-input.json');
expect(body).toContain('patternsArraySchema');
expect(body).toContain('emit_historic_sql_evidence');
expect(body).toContain('cross-table');
expect(body).not.toMatch(forbiddenProductPattern());
});
});

View file

@ -15,7 +15,8 @@ const expectedSkillHeadings: Record<string, string> = {
sl_capture: '# Semantic Layer',
};
const expectedAdapterSkillHeadings: Record<string, string> = {
historic_sql_ingest: '# Historic SQL Ingest',
historic_sql_patterns: '# Historic SQL Patterns',
historic_sql_table_digest: '# Historic SQL Table Digest',
live_database_ingest: '# Live Database Ingest',
looker_ingest: '# Looker Runtime Ingest',
lookml_ingest: '# LookML to KTX Semantic Layer',