diff --git a/packages/context/skills/historic_sql_patterns/SKILL.md b/packages/context/skills/historic_sql_patterns/SKILL.md new file mode 100644 index 00000000..995bdc2c --- /dev/null +++ b/packages/context/skills/historic_sql_patterns/SKILL.md @@ -0,0 +1,57 @@ +--- +name: historic_sql_patterns +description: Identify recurring cross-table historic-SQL analytical intents and emit typed pattern evidence for deterministic wiki projection. +callers: [memory_agent] +--- + +# Historic SQL Patterns + +Use this skill when the WorkUnit raw file is `patterns-input.json` from the `historic-sql` adapter. + +## Required Workflow + +1. Read the WorkUnit notes first. +2. Call `read_raw_file` for `patterns-input.json`. +3. Identify recurring analytical intents that span at least two tables and have repeated usage signal. +4. Emit one `pattern` evidence object per durable cross-table intent by calling `emit_historic_sql_evidence`. +5. Stop after all pattern evidence has been emitted. + +## Evidence Shape + +Each call to `emit_historic_sql_evidence` must use this shape: + +```json +{ + "kind": "pattern", + "rawPath": "patterns-input.json", + "pattern": { + "slug": "order-lifecycle-analysis", + "title": "Order Lifecycle Analysis", + "narrative": "Analysts compare order statuses with customer segments to understand lifecycle movement.", + "definitionSql": "select o.status, count(*) from public.orders o join public.customers c on c.id = o.customer_id group by o.status", + "tablesInvolved": ["public.orders", "public.customers"], + "slRefs": ["orders", "customers"], + "constituentTemplateIds": ["pg:1", "pg:2"] + } +} +``` + +The `pattern` object must match `patternOutputSchema`; multiple calls together must form `patternsArraySchema`. + +## Pattern Selection Rules + +- Prefer patterns that involve two or more tables. +- Prefer templates with `executionsBucket` at least `10-100` and `distinctUsersBucket` above solo usage. +- Merge templates into one pattern only when the business intent is the same. +- Use a stable kebab-case slug based on intent, not a template id. +- Set `definitionSql` to the clearest representative SQL from a constituent template. +- Set `slRefs` to source names when the source name is obvious from table names; omit uncertain refs rather than guessing. + +## Boundaries + +- Do not call wiki_write. +- Do not call sl_write_source. +- Do not call sl_edit_source. +- Do not call context_candidate_write. +- Do not create single-table pattern pages. +- Do not copy credentials, tokens, user emails, or unredacted literals into evidence. diff --git a/packages/context/skills/historic_sql_table_digest/SKILL.md b/packages/context/skills/historic_sql_table_digest/SKILL.md new file mode 100644 index 00000000..34e49d27 --- /dev/null +++ b/packages/context/skills/historic_sql_table_digest/SKILL.md @@ -0,0 +1,61 @@ +--- +name: historic_sql_table_digest +description: Convert one changed historic-SQL table usage bucket into typed table usage evidence for deterministic _schema projection. +callers: [memory_agent] +--- + +# Historic SQL Table Digest + +Use this skill when the WorkUnit raw file is one `tables/..json` file from the `historic-sql` adapter. + +## Required Workflow + +1. Read the WorkUnit notes first. +2. Call `read_raw_file` for the single `tables/..json` raw file. +3. Read `manifest.json` only if the table JSON omits the dialect or the WorkUnit notes are unclear. +4. Produce one concise usage narrative for this table from the staged table JSON. +5. Call `emit_historic_sql_evidence` exactly once with `kind: "table_usage"`. +6. Stop after the evidence tool succeeds. + +## Evidence Shape + +Call `emit_historic_sql_evidence` with this shape: + +```json +{ + "kind": "table_usage", + "table": "public.orders", + "rawPath": "tables/public.orders.json", + "usage": { + "narrative": "Orders are repeatedly queried for paid/refunded lifecycle analysis and customer-level rollups.", + "frequencyTier": "high", + "commonFilters": ["status", "created_at"], + "commonGroupBys": ["status"], + "commonJoins": [{ "table": "public.customers", "on": ["customer_id"] }], + "staleSince": null + } +} +``` + +The `usage` object must match `tableUsageOutputSchema`. + +## Interpretation Rules + +- Treat `columnsByClause.where` as common filters. +- Treat `columnsByClause.groupBy` as common group-bys. +- Treat `observedJoins` as common joins. +- Use `stats.executionsBucket`, `stats.distinctUsersBucket`, and `stats.recencyBucket` to choose `frequencyTier`. +- Use `frequencyTier: "high"` only when executions and distinct users are both broad. +- Use `frequencyTier: "mid"` for repeated team usage that is not broad enough for high. +- Use `frequencyTier: "low"` for low-volume but present usage. +- Use `frequencyTier: "unused"` only when the table input explicitly says the table is stale or has no recent templates. +- Keep `narrative` short and concrete. + +## Boundaries + +- Do not call wiki_write. +- Do not call sl_write_source. +- Do not call sl_edit_source. +- Do not call context_candidate_write. +- Do not emit more than one table usage evidence object. +- Do not invent columns, joins, or tables that are absent from the staged JSON. diff --git a/packages/context/src/ingest/ingest-runtime-assets.test.ts b/packages/context/src/ingest/ingest-runtime-assets.test.ts index 9af8fcf6..539b74e8 100644 --- a/packages/context/src/ingest/ingest-runtime-assets.test.ts +++ b/packages/context/src/ingest/ingest-runtime-assets.test.ts @@ -14,14 +14,14 @@ const adapterSkillNames = [ 'metabase_ingest', 'metricflow_ingest', 'notion_synthesize', - 'historic_sql_ingest', + 'historic_sql_table_digest', + 'historic_sql_patterns', 'ingest_triage', 'knowledge_capture', 'sl_capture', ] as const; const adapterReconcileSkillNames = [ - 'historic_sql_curator', 'ingest_triage', 'knowledge_capture', 'sl_capture', @@ -67,66 +67,34 @@ describe('ingest runtime assets', () => { await expect(prompts.loadPrompt('skills/light_extraction')).resolves.toContain('# Light Context Extraction'); }); - it('packages historic-SQL WorkUnit skill guidance from KTX assets', async () => { + it('packages historic-SQL table digest guidance from KTX assets', async () => { const registry = new SkillsRegistryService({ skillsDir }); - const skills = await registry.listSkills(['historic_sql_ingest'], 'memory_agent'); + const skills = await registry.listSkills(['historic_sql_table_digest'], 'memory_agent'); - expect(skills.map((skill) => skill.name)).toEqual(['historic_sql_ingest']); + expect(skills.map((skill) => skill.name)).toEqual(['historic_sql_table_digest']); - const [skill] = skills; - if (!skill) { - throw new Error('historic_sql_ingest skill missing'); - } - - expect(skill.path.startsWith(skillsDir)).toBe(true); - - const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8'); - expect(body).toContain('# Historic SQL Ingest'); - expect(body).toContain('Read exactly one historic-SQL template WorkUnit'); - expect(body).toContain('metadata.json'); - expect(body).toContain('page.md'); - expect(body).toContain('usage.json'); - expect(body).toContain('manifest.json'); - expect(body).toContain('wiki_write'); - expect(body).toContain('key: "queries/"'); - expect(body).toContain('"source": "historic-sql"'); - expect(body).toContain('representative_sql'); - expect(body).toContain('fingerprints'); - expect(body).toContain('usage'); - expect(body).toContain('SL proposal threshold'); - expect(body).toContain('Do not group sibling templates'); - expect(body).toContain('Do not copy sample bound_sql'); - expect(body).not.toContain('store historic-SQL provenance in the markdown body'); + const body = await readFile(join(skills[0]!.path, 'SKILL.md'), 'utf-8'); + expect(body).toContain('# Historic SQL Table Digest'); + expect(body).toContain('tables/..json'); + expect(body).toContain('tableUsageOutputSchema'); + expect(body).toContain('emit_historic_sql_evidence'); + expect(body).toContain('Do not call wiki_write'); + expect(body).toContain('Do not call sl_write_source'); expect(body).not.toMatch(forbiddenProductPattern()); }); - it('packages historic-SQL curator reconcile guidance from KTX assets', async () => { + it('packages historic-SQL patterns guidance from KTX assets', async () => { const registry = new SkillsRegistryService({ skillsDir }); - const skills = await registry.listSkills(['historic_sql_curator'], 'memory_agent'); + const skills = await registry.listSkills(['historic_sql_patterns'], 'memory_agent'); - expect(skills.map((skill) => skill.name)).toEqual(['historic_sql_curator']); + expect(skills.map((skill) => skill.name)).toEqual(['historic_sql_patterns']); - const [skill] = skills; - if (!skill) { - throw new Error('historic_sql_curator skill missing'); - } - - expect(skill.path.startsWith(skillsDir)).toBe(true); - - const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8'); - expect(body).toContain('# Historic SQL Curator'); - expect(body).toContain('curator pagination'); - expect(body).toContain('stage_list'); - expect(body).toContain('stage_diff'); - expect(body).toContain('read_raw_span'); - expect(body).toContain('wiki_search'); - expect(body).toContain('wiki_read'); - expect(body).toContain('wiki_write'); - expect(body).toContain('emit_artifact_resolution'); - expect(body).toContain('emit_eviction_decision'); - expect(body).toContain('categorical sub-cluster'); - expect(body).toContain('historic-sql-demoted'); - expect(body).toContain('Do not call `context_candidate_write`'); + const body = await readFile(join(skills[0]!.path, 'SKILL.md'), 'utf-8'); + expect(body).toContain('# Historic SQL Patterns'); + expect(body).toContain('patterns-input.json'); + expect(body).toContain('patternsArraySchema'); + expect(body).toContain('emit_historic_sql_evidence'); + expect(body).toContain('cross-table'); expect(body).not.toMatch(forbiddenProductPattern()); }); }); diff --git a/packages/context/src/memory/memory-runtime-assets.test.ts b/packages/context/src/memory/memory-runtime-assets.test.ts index 204461ec..36d4dc7c 100644 --- a/packages/context/src/memory/memory-runtime-assets.test.ts +++ b/packages/context/src/memory/memory-runtime-assets.test.ts @@ -15,7 +15,8 @@ const expectedSkillHeadings: Record = { sl_capture: '# Semantic Layer', }; const expectedAdapterSkillHeadings: Record = { - historic_sql_ingest: '# Historic SQL Ingest', + historic_sql_patterns: '# Historic SQL Patterns', + historic_sql_table_digest: '# Historic SQL Table Digest', live_database_ingest: '# Live Database Ingest', looker_ingest: '# Looker Runtime Ingest', lookml_ingest: '# LookML to KTX Semantic Layer',