mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-10 08:05:14 +02:00
fix: stabilize ingest wiki references
This commit is contained in:
parent
53c9a1eefa
commit
7e7795859b
29 changed files with 502 additions and 111 deletions
|
|
@ -265,6 +265,18 @@ export class CliLookerSlWritingAgentRunner extends AgentRunnerService {
|
|||
params.telemetryTags?.operationName === 'ingest-bundle-wu' &&
|
||||
params.telemetryTags?.unitKey === 'looker-explore-ecommerce-orders'
|
||||
) {
|
||||
const ledger = params.toolSet.record_verification_ledger;
|
||||
if (!ledger?.execute) {
|
||||
throw new Error('record_verification_ledger tool was not available to the Looker WorkUnit');
|
||||
}
|
||||
await ledger.execute(
|
||||
{
|
||||
summary: 'Test fixture verified Looker explore target identifiers before writing SL.',
|
||||
verifiedIdentifiers: ['prod-warehouse', 'public.orders'],
|
||||
unverifiedIdentifiers: [],
|
||||
},
|
||||
{ toolCallId: 'cli-looker-verification-ledger', messages: [] },
|
||||
);
|
||||
const slWrite = params.toolSet.sl_write_source;
|
||||
if (!slWrite?.execute) {
|
||||
throw new Error('sl_write_source tool was not available to the Looker WorkUnit');
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ Parsimonious. Stage 3 WUs already loaded `ingest_triage` and handled conflicts t
|
|||
|
||||
<scope>
|
||||
All wiki writes are GLOBAL (same as Stage 3). SL writes target the same session worktree Stage 3 used.
|
||||
Wiki keys must be flat slugs, not directory paths. If a Stage 3 page used a path-like key and a flat retry exists, treat the flat key as the canonical page.
|
||||
</scope>
|
||||
|
||||
<do_not>
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ Assertive. The bundle was explicitly submitted for ingest. Default to capturing
|
|||
|
||||
<scope>
|
||||
All wiki writes go to the GLOBAL scope. Bundle ingests are not personal. The `wiki_write` tool selects scope automatically for this caller.
|
||||
Wiki keys must be flat slugs like `paid-order-lifecycle`, not directory paths like `historic-sql/paid-order-lifecycle`. Use `tags`, `source`, and page content to group related pages.
|
||||
</scope>
|
||||
|
||||
<do_not>
|
||||
|
|
|
|||
|
|
@ -100,6 +100,10 @@ The `wiki_write` tool accepts three array fields that go into the page frontmatt
|
|||
- **`refs`**: keys of related wiki pages. Add when the new page materially depends on concepts from another (e.g., a churn definition that uses the paid-orders filter from a revenue definition). Don't add refs just because pages share a topic area.
|
||||
- **`sl_refs`**: names of SL sources or measures the page relates to. Format: `"source_name"` or `"source_name.measure_name"`. Discover via `sl_discover` → inspect with `sl_read_source` → include the confirmed matches.
|
||||
|
||||
Wiki page keys must be flat slugs. Use `large-contract-requesters`, not
|
||||
`historic-sql/large-contract-requesters`. Use `tags`, `source`, and content
|
||||
headings for grouping.
|
||||
|
||||
### Replace semantics
|
||||
|
||||
All three fields use REPLACE semantics on update:
|
||||
|
|
|
|||
|
|
@ -277,7 +277,7 @@ describe('historic-SQL local ingest retrieval acceptance', () => {
|
|||
|
||||
await expect(readFile(join(project.projectDir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8')).resolves
|
||||
.toContain('Analysts repeatedly inspect paid order lifecycle by customer segment.');
|
||||
await expect(readFile(join(project.projectDir, 'knowledge/global/historic-sql/paid-order-lifecycle.md'), 'utf-8'))
|
||||
await expect(readFile(join(project.projectDir, 'knowledge/global/historic-sql-paid-order-lifecycle.md'), 'utf-8'))
|
||||
.resolves.toContain('Paid Order Lifecycle');
|
||||
|
||||
const reloaded = await loadKtxProject({ projectDir: project.projectDir });
|
||||
|
|
@ -295,7 +295,7 @@ describe('historic-SQL local ingest retrieval acceptance', () => {
|
|||
searchLocalKnowledgePages(reloaded, { query: 'paid order lifecycle', userId: 'local', limit: 5 }),
|
||||
).resolves.toEqual([
|
||||
expect.objectContaining({
|
||||
key: 'historic-sql/paid-order-lifecycle',
|
||||
key: 'historic-sql-paid-order-lifecycle',
|
||||
summary: 'Paid Order Lifecycle',
|
||||
matchReasons: expect.arrayContaining(['lexical']),
|
||||
}),
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ async function commitProjectionChanges(workdir: string): Promise<void> {
|
|||
const status = await git.status();
|
||||
const paths = status.files
|
||||
.map((file) => file.path)
|
||||
.filter((path) => path.startsWith('semantic-layer/') || path.startsWith('knowledge/global/historic-sql/'));
|
||||
.filter((path) => path.startsWith('semantic-layer/') || path.startsWith('knowledge/global/historic-sql'));
|
||||
if (paths.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -106,7 +106,7 @@ describe('projectHistoricSqlEvidence', () => {
|
|||
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/tables/public.customers.json', { table: 'public.customers' });
|
||||
await writeText(
|
||||
workdir,
|
||||
'knowledge/global/historic-sql/old-order-lifecycle.md',
|
||||
'knowledge/global/historic-sql-old-order-lifecycle.md',
|
||||
[
|
||||
'---',
|
||||
YAML.stringify({
|
||||
|
|
@ -127,7 +127,7 @@ describe('projectHistoricSqlEvidence', () => {
|
|||
);
|
||||
await writeText(
|
||||
workdir,
|
||||
'knowledge/global/historic-sql/retired-pattern.md',
|
||||
'knowledge/global/historic-sql-retired-pattern.md',
|
||||
[
|
||||
'---',
|
||||
YAML.stringify({
|
||||
|
|
@ -164,15 +164,15 @@ describe('projectHistoricSqlEvidence', () => {
|
|||
const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' });
|
||||
|
||||
expect(result.patternPagesWritten).toBe(1);
|
||||
await expect(readFile(join(workdir, 'knowledge/global/historic-sql/old-order-lifecycle.md'), 'utf-8')).resolves.toContain(
|
||||
await expect(readFile(join(workdir, 'knowledge/global/historic-sql-old-order-lifecycle.md'), 'utf-8')).resolves.toContain(
|
||||
'Order Lifecycle Analysis',
|
||||
);
|
||||
await expect(readFile(join(workdir, 'knowledge/global/historic-sql/retired-pattern.md'), 'utf-8')).resolves.toContain(
|
||||
await expect(readFile(join(workdir, 'knowledge/global/historic-sql-retired-pattern.md'), 'utf-8')).resolves.toContain(
|
||||
'stale_since: "2026-05-11T00:00:00.000Z"',
|
||||
);
|
||||
});
|
||||
|
||||
it('writes a reappearing pattern to the active slug instead of reusing an archived page key', async () => {
|
||||
it('rewrites a reappearing archived pattern at the flat slug', async () => {
|
||||
const workdir = await tempWorkdir();
|
||||
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/manifest.json', {
|
||||
source: 'historic-sql',
|
||||
|
|
@ -192,7 +192,7 @@ describe('projectHistoricSqlEvidence', () => {
|
|||
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/tables/public.customers.json', { table: 'public.customers' });
|
||||
await writeText(
|
||||
workdir,
|
||||
'knowledge/global/historic-sql/_archived/order-lifecycle-analysis.md',
|
||||
'knowledge/global/historic-sql-order-lifecycle-analysis.md',
|
||||
[
|
||||
'---',
|
||||
YAML.stringify({
|
||||
|
|
@ -230,15 +230,10 @@ describe('projectHistoricSqlEvidence', () => {
|
|||
const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' });
|
||||
|
||||
expect(result.patternPagesWritten).toBe(1);
|
||||
await expect(readFile(join(workdir, 'knowledge/global/historic-sql/order-lifecycle-analysis.md'), 'utf-8')).resolves.toContain(
|
||||
'Order Lifecycle Analysis',
|
||||
);
|
||||
await expect(readFile(join(workdir, 'knowledge/global/historic-sql/_archived/order-lifecycle-analysis.md'), 'utf-8')).resolves.toContain(
|
||||
'Archived body',
|
||||
);
|
||||
await expect(
|
||||
readFile(join(workdir, 'knowledge/global/historic-sql/_archived/_archived/order-lifecycle-analysis.md'), 'utf-8'),
|
||||
).rejects.toMatchObject({ code: 'ENOENT' });
|
||||
const page = await readFile(join(workdir, 'knowledge/global/historic-sql-order-lifecycle-analysis.md'), 'utf-8');
|
||||
expect(page).toContain('Analysts compare order status with customer segment again.');
|
||||
expect(page).not.toContain('Archived body');
|
||||
expect(page).not.toContain('archived');
|
||||
});
|
||||
|
||||
it('leaves already archived pattern pages stable when they are still absent', async () => {
|
||||
|
|
@ -259,7 +254,7 @@ describe('projectHistoricSqlEvidence', () => {
|
|||
});
|
||||
await writeText(
|
||||
workdir,
|
||||
'knowledge/global/historic-sql/_archived/retired-pattern.md',
|
||||
'knowledge/global/historic-sql-retired-pattern.md',
|
||||
[
|
||||
'---',
|
||||
YAML.stringify({
|
||||
|
|
@ -284,12 +279,9 @@ describe('projectHistoricSqlEvidence', () => {
|
|||
|
||||
expect(result.archivedPatternPages).toBe(0);
|
||||
expect(result.stalePatternPagesMarked).toBe(0);
|
||||
await expect(readFile(join(workdir, 'knowledge/global/historic-sql/_archived/retired-pattern.md'), 'utf-8')).resolves.toContain(
|
||||
await expect(readFile(join(workdir, 'knowledge/global/historic-sql-retired-pattern.md'), 'utf-8')).resolves.toContain(
|
||||
'Archived retired body',
|
||||
);
|
||||
await expect(readFile(join(workdir, 'knowledge/global/historic-sql/_archived/_archived/retired-pattern.md'), 'utf-8')).rejects.toMatchObject({
|
||||
code: 'ENOENT',
|
||||
});
|
||||
});
|
||||
|
||||
it('marks missing table usage stale and deletes legacy historic SQL query pages', async () => {
|
||||
|
|
@ -330,7 +322,7 @@ describe('projectHistoricSqlEvidence', () => {
|
|||
});
|
||||
await writeText(
|
||||
workdir,
|
||||
'knowledge/global/historic-sql/legacy-template.md',
|
||||
'knowledge/global/historic-sql-legacy-template.md',
|
||||
[
|
||||
'---',
|
||||
YAML.stringify({
|
||||
|
|
@ -365,7 +357,7 @@ describe('projectHistoricSqlEvidence', () => {
|
|||
commonJoins: [],
|
||||
staleSince: '2026-05-11T00:00:00.000Z',
|
||||
});
|
||||
await expect(readFile(join(workdir, 'knowledge/global/historic-sql/legacy-template.md'), 'utf-8')).rejects.toMatchObject({
|
||||
await expect(readFile(join(workdir, 'knowledge/global/historic-sql-legacy-template.md'), 'utf-8')).rejects.toMatchObject({
|
||||
code: 'ENOENT',
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ interface HistoricSqlPatternPage {
|
|||
}
|
||||
|
||||
function safeKnowledgeSlug(value: string): string {
|
||||
return value.toLowerCase().replace(/[^a-z0-9/-]+/g, '-').replace(/^-+|-+$/g, '');
|
||||
return value.toLowerCase().replace(/[^a-z0-9_-]+/g, '-').replace(/^-+|-+$/g, '');
|
||||
}
|
||||
|
||||
async function pathExists(path: string): Promise<boolean> {
|
||||
|
|
@ -159,7 +159,7 @@ function isLegacyQueryPage(page: HistoricSqlPatternPage): boolean {
|
|||
|
||||
function isArchivedPatternPage(page: HistoricSqlPatternPage): boolean {
|
||||
const tags = Array.isArray(page.frontmatter.tags) ? page.frontmatter.tags : [];
|
||||
return page.key.startsWith('_archived/') || tags.includes('archived');
|
||||
return tags.includes('archived');
|
||||
}
|
||||
|
||||
function stringArray(value: unknown): string[] {
|
||||
|
|
@ -191,6 +191,9 @@ async function loadPatternPages(root: string): Promise<HistoricSqlPatternPage[]>
|
|||
const files = await walkFiles(root);
|
||||
const pages: HistoricSqlPatternPage[] = [];
|
||||
for (const file of files.filter((candidate) => candidate.endsWith('.md'))) {
|
||||
if (file.includes('/')) {
|
||||
continue;
|
||||
}
|
||||
const key = file.replace(/\.md$/, '');
|
||||
const path = join(root, file);
|
||||
const page = parseMarkdownPage(key, path, await readFile(path, 'utf-8'));
|
||||
|
|
@ -201,6 +204,10 @@ async function loadPatternPages(root: string): Promise<HistoricSqlPatternPage[]>
|
|||
return pages;
|
||||
}
|
||||
|
||||
function historicSqlFlatKey(slug: string): string {
|
||||
return `historic-sql-${safeKnowledgeSlug(slug)}`;
|
||||
}
|
||||
|
||||
async function currentStagedTables(rawDir: string): Promise<Set<string>> {
|
||||
const tablesRoot = join(rawDir, 'tables');
|
||||
const files = await walkFiles(tablesRoot);
|
||||
|
|
@ -276,7 +283,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
|
|||
}
|
||||
}
|
||||
|
||||
const wikiRoot = join(input.workdir, 'knowledge/global/historic-sql');
|
||||
const wikiRoot = join(input.workdir, 'knowledge/global');
|
||||
await mkdir(wikiRoot, { recursive: true });
|
||||
const allPages = await loadPatternPages(wikiRoot);
|
||||
const activePages = allPages.filter((page) => !isArchivedPatternPage(page));
|
||||
|
|
@ -286,7 +293,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
|
|||
for (const pattern of patternEvidence) {
|
||||
const incomingSignals = [...pattern.pattern.tablesInvolved, ...pattern.pattern.constituentTemplateIds];
|
||||
const reusable = patternPages.find((page) => overlapRatio(incomingSignals, existingPageSignals(page)) >= 0.6);
|
||||
const key = reusable?.key ?? safeKnowledgeSlug(pattern.pattern.slug);
|
||||
const key = reusable?.key ?? historicSqlFlatKey(pattern.pattern.slug);
|
||||
const pagePath = join(wikiRoot, `${key}.md`);
|
||||
const frontmatter = {
|
||||
summary: pattern.pattern.title,
|
||||
|
|
@ -308,11 +315,12 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
|
|||
for (const page of patternPages) {
|
||||
if (writtenKeys.has(page.key)) continue;
|
||||
if (shouldArchive(page.frontmatter.stale_since, manifest.fetchedAt, manifest.staleArchiveAfterDays)) {
|
||||
const archivePath = join(wikiRoot, '_archived', `${page.key}.md`);
|
||||
const tags = [...new Set([...stringArray(page.frontmatter.tags), 'archived'])];
|
||||
await mkdir(dirname(archivePath), { recursive: true });
|
||||
await writeFile(archivePath, renderMarkdownPage({ ...page.frontmatter, tags }, page.content), 'utf-8');
|
||||
await rm(page.path, { force: true });
|
||||
await writeFile(
|
||||
page.path,
|
||||
renderMarkdownPage({ ...page.frontmatter, tags, archived_since: manifest.fetchedAt }, page.content),
|
||||
'utf-8',
|
||||
);
|
||||
result.archivedPatternPages += 1;
|
||||
continue;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@ import type {
|
|||
UnresolvedCardInfo,
|
||||
WorkUnit,
|
||||
} from './types.js';
|
||||
import { repairWikiSlRefs, type WikiSlRefRepairResult } from './wiki-sl-ref-repair.js';
|
||||
|
||||
function workUnitToMemoryFlowPlannedWorkUnit(workUnit: WorkUnit): MemoryFlowPlannedWorkUnit {
|
||||
return {
|
||||
|
|
@ -528,6 +529,7 @@ export class IngestBundleRunner {
|
|||
let sourceContextReport: { capped?: boolean; warnings?: string[] } | undefined;
|
||||
let parseArtifacts: unknown;
|
||||
let postProcessorOutcome: IngestReportPostProcessorOutcome | undefined;
|
||||
let wikiSlRefRepairResult: WikiSlRefRepairResult | null = null;
|
||||
let reconcileNotes: string[] = [];
|
||||
let triageResult: PageTriageRunResult | null = null;
|
||||
if (overrideReport) {
|
||||
|
|
@ -1140,6 +1142,19 @@ export class IngestBundleRunner {
|
|||
}
|
||||
}
|
||||
|
||||
const repairConnectionIds = [
|
||||
...new Set([
|
||||
...slConnectionIds,
|
||||
...(postProcessorOutcome?.touchedSources ?? []).map((source) => source.connectionId),
|
||||
]),
|
||||
].sort();
|
||||
wikiSlRefRepairResult = await repairWikiSlRefs({
|
||||
wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir),
|
||||
semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir),
|
||||
configService: sessionWorktree.config,
|
||||
connectionIds: repairConnectionIds,
|
||||
});
|
||||
|
||||
// Stage 6 — squash commit
|
||||
const stage6 = ctx?.startPhase(0.04);
|
||||
await stage6?.updateProgress(0.0, 'Saving changes');
|
||||
|
|
@ -1356,6 +1371,8 @@ export class IngestBundleRunner {
|
|||
provenanceRows: reportProvenanceRows,
|
||||
toolTranscripts: reportToolTranscripts,
|
||||
postProcessor: postProcessorOutcome,
|
||||
wikiSlRefRepairs: wikiSlRefRepairResult.repairs,
|
||||
wikiSlRefRepairWarnings: wikiSlRefRepairResult.warnings,
|
||||
...(reportMemoryFlow ? { memoryFlow: reportMemoryFlow } : {}),
|
||||
context: contextReport
|
||||
? {
|
||||
|
|
|
|||
|
|
@ -27,6 +27,18 @@ class LookerSlWritingAgentRunner extends AgentRunnerService {
|
|||
params.telemetryTags?.operationName === 'ingest-bundle-wu' &&
|
||||
params.telemetryTags?.unitKey === 'looker-explore-ecommerce-orders'
|
||||
) {
|
||||
const ledger = params.toolSet.record_verification_ledger;
|
||||
if (!ledger?.execute) {
|
||||
throw new Error('record_verification_ledger tool was not available to the Looker WorkUnit');
|
||||
}
|
||||
await ledger.execute(
|
||||
{
|
||||
summary: 'Test fixture verified Looker explore target identifiers before writing SL.',
|
||||
verifiedIdentifiers: ['prod-warehouse', 'public.orders'],
|
||||
unverifiedIdentifiers: [],
|
||||
},
|
||||
{ toolCallId: 'looker-verification-ledger', messages: [] },
|
||||
);
|
||||
const slWrite = params.toolSet.sl_write_source;
|
||||
if (!slWrite?.execute) {
|
||||
throw new Error('sl_write_source tool was not available to the Looker WorkUnit');
|
||||
|
|
@ -63,6 +75,18 @@ class LookerSlWritingAgentRunner extends AgentRunnerService {
|
|||
class WikiWritingAgentRunner extends AgentRunnerService {
|
||||
override runLoop = vi.fn(async (params: any) => {
|
||||
if (params.telemetryTags?.operationName === 'ingest-bundle-wu') {
|
||||
const ledger = params.toolSet.record_verification_ledger;
|
||||
if (!ledger?.execute) {
|
||||
throw new Error('record_verification_ledger tool was not available to the WorkUnit');
|
||||
}
|
||||
await ledger.execute(
|
||||
{
|
||||
summary: 'Test fixture writes wiki-only context with no warehouse identifiers.',
|
||||
verifiedIdentifiers: [],
|
||||
unverifiedIdentifiers: [],
|
||||
},
|
||||
{ toolCallId: 'wiki-verification-ledger', messages: [] },
|
||||
);
|
||||
const wikiWrite = params.toolSet.wiki_write;
|
||||
if (!wikiWrite?.execute) {
|
||||
throw new Error('wiki_write tool was not available to the WorkUnit');
|
||||
|
|
@ -91,6 +115,18 @@ class WikiWritingAgentRunner extends AgentRunnerService {
|
|||
class WikiWritingWithRawPathAgentRunner extends AgentRunnerService {
|
||||
override runLoop = vi.fn(async (params: any) => {
|
||||
if (params.telemetryTags?.operationName === 'ingest-bundle-wu') {
|
||||
const ledger = params.toolSet.record_verification_ledger;
|
||||
if (!ledger?.execute) {
|
||||
throw new Error('record_verification_ledger tool was not available to the WorkUnit');
|
||||
}
|
||||
await ledger.execute(
|
||||
{
|
||||
summary: 'Test fixture writes wiki-only context with explicit raw provenance and no warehouse identifiers.',
|
||||
verifiedIdentifiers: [],
|
||||
unverifiedIdentifiers: [],
|
||||
},
|
||||
{ toolCallId: 'wiki-raw-path-verification-ledger', messages: [] },
|
||||
);
|
||||
const wikiWrite = params.toolSet.wiki_write;
|
||||
if (!wikiWrite?.execute) {
|
||||
throw new Error('wiki_write tool was not available to the WorkUnit');
|
||||
|
|
|
|||
|
|
@ -56,6 +56,7 @@ import {
|
|||
buildKnowledgeSearchText,
|
||||
type KnowledgeEventPort,
|
||||
type KnowledgeIndexPort,
|
||||
type KnowledgeIndexPageListing,
|
||||
KnowledgeWikiService,
|
||||
searchLocalKnowledgePages,
|
||||
SqliteKnowledgeIndex,
|
||||
|
|
@ -351,15 +352,19 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort {
|
|||
|
||||
async listPagesForUser(
|
||||
userId: string,
|
||||
): Promise<Array<{ page_key: string; summary: string; scope: string; scope_id: string | null }>> {
|
||||
const pages: Array<{ page_key: string; summary: string; scope: string; scope_id: string | null }> = [];
|
||||
): Promise<KnowledgeIndexPageListing[]> {
|
||||
const pages: KnowledgeIndexPageListing[] = [];
|
||||
for (const scope of [
|
||||
{ scope: 'GLOBAL', scopeId: null, dir: 'knowledge/global' },
|
||||
{ scope: 'USER', scopeId: userId, dir: `knowledge/user/${userId}` },
|
||||
]) {
|
||||
const listed = await this.project.fileStore.listFiles(scope.dir, true);
|
||||
for (const file of listed.files.filter((entry) => entry.endsWith('.md'))) {
|
||||
const pageKey = file.replace(/\.md$/, '');
|
||||
const parsedPath = parseKnowledgeIndexPath(file.startsWith('global/') || file.startsWith('user/') ? file : `${scope.dir.replace('knowledge/', '')}/${file}`);
|
||||
if (!parsedPath || parsedPath.scope !== scope.scope) {
|
||||
continue;
|
||||
}
|
||||
const pageKey = parsedPath.pageKey;
|
||||
const raw = await this.project.fileStore.readFile(`${scope.dir}/${file}`);
|
||||
const parsed = parseWiki(raw.content);
|
||||
pages.push({
|
||||
|
|
@ -367,6 +372,7 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort {
|
|||
summary: parsed.summary,
|
||||
scope: scope.scope,
|
||||
scope_id: scope.scopeId,
|
||||
tags: parseWikiTags(raw.content),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -436,13 +442,6 @@ function parseKnowledgeIndexPath(file: string): { scope: 'GLOBAL' | 'USER'; page
|
|||
const pageKey = segments[1].replace(/\.md$/, '');
|
||||
return /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(pageKey) ? { scope: 'GLOBAL', pageKey } : null;
|
||||
}
|
||||
if (segments.length >= 3 && segments[0] === 'global' && segments[1] === 'historic-sql') {
|
||||
const historicPath = segments.slice(2).join('/').replace(/\.md$/, '');
|
||||
if (historicPath.split('/').every((segment) => /^[a-zA-Z0-9_][a-zA-Z0-9_-]*$/.test(segment))) {
|
||||
return { scope: 'GLOBAL', pageKey: `historic-sql/${historicPath}` };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
if (segments.length === 3 && segments[0] === 'user') {
|
||||
const pageKey = segments[2].replace(/\.md$/, '');
|
||||
return /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(pageKey) ? { scope: 'USER', pageKey } : null;
|
||||
|
|
@ -521,7 +520,7 @@ class LocalIngestToolsetFactory implements IngestToolsetFactoryPort {
|
|||
this.baseTools = [
|
||||
new WikiReadTool(deps.wikiService, deps.knowledgeIndex),
|
||||
wikiSearchTool,
|
||||
new WikiListTagsTool(deps.wikiService, deps.knowledgeIndex),
|
||||
new WikiListTagsTool(deps.knowledgeIndex),
|
||||
new WikiWriteTool(deps.wikiService, deps.knowledgeIndex, deps.knowledgeEvents),
|
||||
new WikiRemoveTool(deps.wikiService, deps.knowledgeIndex, deps.knowledgeEvents),
|
||||
slDiscoverTool,
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ import type {
|
|||
StageIndex,
|
||||
UnmappedFallbackRecord,
|
||||
} from './stages/stage-index.types.js';
|
||||
import type { WikiSlRefRepair } from './wiki-sl-ref-repair.js';
|
||||
import type { IngestDiffSummary, SourceFetchReport, UnresolvedCardInfo } from './types.js';
|
||||
|
||||
export interface IngestReportWorkUnit {
|
||||
|
|
@ -70,6 +71,8 @@ export interface IngestReportBody {
|
|||
provenanceRows: IngestReportProvenanceDetail[];
|
||||
toolTranscripts: IngestReportToolTranscriptSummary[];
|
||||
postProcessor?: IngestReportPostProcessorOutcome;
|
||||
wikiSlRefRepairs?: WikiSlRefRepair[];
|
||||
wikiSlRefRepairWarnings?: string[];
|
||||
memoryFlow?: MemoryFlowReplayInput;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -36,6 +36,28 @@ describe('tool transcript summaries', () => {
|
|||
expect(summary.fatalErrorCount).toBe(0);
|
||||
});
|
||||
|
||||
it('treats a suggested flat wiki key retry as recovery for an invalid nested key', () => {
|
||||
const summary = createMutableToolTranscriptSummary('wu-1', '/tmp/wu-1.jsonl');
|
||||
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
input: { key: 'historic-sql/top-accounts-by-contract-arr' },
|
||||
output: { structured: { success: false, key: 'historic-sql/top-accounts-by-contract-arr' } },
|
||||
}),
|
||||
);
|
||||
recordToolTranscriptEntry(
|
||||
summary,
|
||||
entry({
|
||||
input: { key: 'historic-sql-top-accounts-by-contract-arr' },
|
||||
output: { structured: { success: true, key: 'historic-sql-top-accounts-by-contract-arr' } },
|
||||
}),
|
||||
);
|
||||
|
||||
expect(summary.errorCount).toBe(1);
|
||||
expect(summary.fatalErrorCount).toBe(0);
|
||||
});
|
||||
|
||||
it('counts unrecovered wiki_remove structured failures as fatal transcript errors', () => {
|
||||
const summary = createMutableToolTranscriptSummary('reconcile', '/tmp/reconcile.jsonl');
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import type { ToolCallLogEntry } from './tool-call-logger.js';
|
||||
import { isFlatWikiKey, suggestFlatWikiKey } from '../../wiki/keys.js';
|
||||
|
||||
export interface MutableToolTranscriptSummary {
|
||||
unitKey: string;
|
||||
|
|
@ -112,7 +113,10 @@ function structuredSuccess(output: unknown): boolean | null {
|
|||
|
||||
function wikiTargetKey(entry: ToolCallLogEntry): string | null {
|
||||
const key = stringField(recordField(entry.output, 'structured'), 'key') ?? stringField(entry.input, 'key');
|
||||
return key ? `wiki:${key}` : null;
|
||||
if (!key) {
|
||||
return null;
|
||||
}
|
||||
return `wiki:${isFlatWikiKey(key) ? key : suggestFlatWikiKey(key)}`;
|
||||
}
|
||||
|
||||
function slTargetKey(entry: ToolCallLogEntry): string | null {
|
||||
|
|
|
|||
99
packages/context/src/ingest/wiki-sl-ref-repair.test.ts
Normal file
99
packages/context/src/ingest/wiki-sl-ref-repair.test.ts
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { repairWikiSlRefs } from './wiki-sl-ref-repair.js';
|
||||
|
||||
describe('repairWikiSlRefs', () => {
|
||||
it('removes missing measure refs while keeping source, measure, segment, and manifest-backed refs', async () => {
|
||||
type TestPage = { pageKey: string; frontmatter: Record<string, unknown>; content: string };
|
||||
const pages = new Map<string, TestPage>([
|
||||
[
|
||||
'GLOBAL:accounts-at-risk',
|
||||
{
|
||||
pageKey: 'accounts-at-risk',
|
||||
frontmatter: {
|
||||
summary: 'Accounts at risk',
|
||||
usage_mode: 'auto',
|
||||
sl_refs: [
|
||||
'mart_customer_health',
|
||||
'mart_customer_health.high_risk_account_count',
|
||||
'mart_customer_health.medium_risk_account_count',
|
||||
'mart_customer_health.high_risk',
|
||||
'int_procurement_qualifying_actions',
|
||||
],
|
||||
},
|
||||
content: 'Risk context.',
|
||||
},
|
||||
],
|
||||
]);
|
||||
const wikiService = {
|
||||
readPage: vi.fn(async (scope: string, _scopeId: string | null, key: string) => pages.get(`${scope}:${key}`)),
|
||||
writePage: vi.fn(
|
||||
async (
|
||||
scope: string,
|
||||
_scopeId: string | null,
|
||||
key: string,
|
||||
frontmatter: Record<string, unknown>,
|
||||
content: string,
|
||||
) => {
|
||||
pages.set(`${scope}:${key}`, { pageKey: key, frontmatter, content });
|
||||
},
|
||||
),
|
||||
};
|
||||
const configService = {
|
||||
listFiles: vi.fn(async () => ({
|
||||
files: ['global/accounts-at-risk.md', 'global/historic-sql/nested-legacy.md'],
|
||||
})),
|
||||
};
|
||||
const semanticLayerService = {
|
||||
loadAllSources: vi.fn(async () => [
|
||||
{
|
||||
name: 'mart_customer_health',
|
||||
grain: [],
|
||||
columns: [],
|
||||
joins: [],
|
||||
measures: [{ name: 'high_risk_account_count', expr: 'count(*)' }],
|
||||
segments: [{ name: 'high_risk', expr: "risk_level = 'high'" }],
|
||||
},
|
||||
{
|
||||
name: 'int_procurement_qualifying_actions',
|
||||
grain: [],
|
||||
columns: [],
|
||||
joins: [],
|
||||
measures: [],
|
||||
},
|
||||
]),
|
||||
};
|
||||
|
||||
const result = await repairWikiSlRefs({
|
||||
wikiService: wikiService as never,
|
||||
semanticLayerService: semanticLayerService as never,
|
||||
configService: configService as never,
|
||||
connectionIds: ['warehouse'],
|
||||
});
|
||||
|
||||
expect(result.repairs).toEqual([
|
||||
{
|
||||
pageKey: 'accounts-at-risk',
|
||||
scope: 'GLOBAL',
|
||||
scopeId: null,
|
||||
removedRefs: ['mart_customer_health.medium_risk_account_count'],
|
||||
},
|
||||
]);
|
||||
expect(wikiService.writePage).toHaveBeenCalledWith(
|
||||
'GLOBAL',
|
||||
null,
|
||||
'accounts-at-risk',
|
||||
expect.objectContaining({
|
||||
sl_refs: [
|
||||
'mart_customer_health',
|
||||
'mart_customer_health.high_risk_account_count',
|
||||
'mart_customer_health.high_risk',
|
||||
'int_procurement_qualifying_actions',
|
||||
],
|
||||
}),
|
||||
'Risk context.',
|
||||
'System User',
|
||||
'system@example.com',
|
||||
'Repair semantic-layer refs: accounts-at-risk',
|
||||
);
|
||||
});
|
||||
});
|
||||
140
packages/context/src/ingest/wiki-sl-ref-repair.ts
Normal file
140
packages/context/src/ingest/wiki-sl-ref-repair.ts
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
import type { KtxFileStorePort } from '../core/index.js';
|
||||
import type { SemanticLayerService, SemanticLayerSource } from '../sl/index.js';
|
||||
import { isFlatWikiKey } from '../wiki/keys.js';
|
||||
import type { KnowledgeWikiService, WikiFrontmatter } from '../wiki/index.js';
|
||||
|
||||
const SYSTEM_AUTHOR = 'System User';
|
||||
const SYSTEM_EMAIL = 'system@example.com';
|
||||
|
||||
export interface WikiSlRefRepair {
|
||||
pageKey: string;
|
||||
scope: 'GLOBAL' | 'USER';
|
||||
scopeId: string | null;
|
||||
removedRefs: string[];
|
||||
}
|
||||
|
||||
export interface WikiSlRefRepairResult {
|
||||
repairs: WikiSlRefRepair[];
|
||||
warnings: string[];
|
||||
}
|
||||
|
||||
interface WikiPath {
|
||||
scope: 'GLOBAL' | 'USER';
|
||||
scopeId: string | null;
|
||||
pageKey: string;
|
||||
}
|
||||
|
||||
function parseKnowledgeFilePath(path: string): WikiPath | null {
|
||||
if (!path.endsWith('.md')) {
|
||||
return null;
|
||||
}
|
||||
const segments = path.split('/');
|
||||
if (segments.length === 2 && segments[0] === 'global') {
|
||||
const pageKey = segments[1].replace(/\.md$/, '');
|
||||
return isFlatWikiKey(pageKey) ? { scope: 'GLOBAL', scopeId: null, pageKey } : null;
|
||||
}
|
||||
if (segments.length === 3 && segments[0] === 'user') {
|
||||
const pageKey = segments[2].replace(/\.md$/, '');
|
||||
return isFlatWikiKey(pageKey) ? { scope: 'USER', scopeId: segments[1], pageKey } : null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function entityRefsForSource(source: SemanticLayerSource): string[] {
|
||||
return [
|
||||
source.name,
|
||||
...(source.measures ?? []).map((measure) => `${source.name}.${measure.name}`),
|
||||
...(source.segments ?? []).map((segment) => `${source.name}.${segment.name}`),
|
||||
];
|
||||
}
|
||||
|
||||
async function loadVisibleSlRefs(
|
||||
semanticLayerService: SemanticLayerService,
|
||||
connectionIds: string[],
|
||||
): Promise<{ refs: Set<string>; warnings: string[] }> {
|
||||
const refs = new Set<string>();
|
||||
const warnings: string[] = [];
|
||||
for (const connectionId of connectionIds) {
|
||||
try {
|
||||
for (const source of await semanticLayerService.loadAllSources(connectionId)) {
|
||||
for (const ref of entityRefsForSource(source)) {
|
||||
refs.add(ref);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
warnings.push(
|
||||
`Skipped wiki sl_refs repair for connection ${connectionId}: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
return { refs, warnings };
|
||||
}
|
||||
|
||||
function uniqueStringArray(value: string[] | undefined): string[] {
|
||||
return [...new Set((value ?? []).filter((entry) => typeof entry === 'string' && entry.trim().length > 0))];
|
||||
}
|
||||
|
||||
export async function repairWikiSlRefs(input: {
|
||||
wikiService: KnowledgeWikiService;
|
||||
semanticLayerService: SemanticLayerService;
|
||||
configService: KtxFileStorePort;
|
||||
connectionIds: string[];
|
||||
}): Promise<WikiSlRefRepairResult> {
|
||||
const { refs: validRefs, warnings } = await loadVisibleSlRefs(input.semanticLayerService, input.connectionIds);
|
||||
const listFiles =
|
||||
typeof input.configService.listFiles === 'function'
|
||||
? input.configService.listFiles.bind(input.configService)
|
||||
: null;
|
||||
if (!listFiles) {
|
||||
return {
|
||||
repairs: [],
|
||||
warnings: [...warnings, 'Skipped wiki sl_refs repair: config service cannot list wiki files.'],
|
||||
};
|
||||
}
|
||||
const listed = await listFiles('knowledge', true);
|
||||
const repairs: WikiSlRefRepair[] = [];
|
||||
|
||||
for (const file of listed.files.sort()) {
|
||||
const parsedPath = parseKnowledgeFilePath(file);
|
||||
if (!parsedPath) {
|
||||
continue;
|
||||
}
|
||||
const page = await input.wikiService.readPage(parsedPath.scope, parsedPath.scopeId, parsedPath.pageKey);
|
||||
const refs = uniqueStringArray(page?.frontmatter.sl_refs);
|
||||
if (!page || refs.length === 0) {
|
||||
continue;
|
||||
}
|
||||
const keptRefs = refs.filter((ref) => validRefs.has(ref));
|
||||
const removedRefs = refs.filter((ref) => !validRefs.has(ref));
|
||||
if (removedRefs.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const frontmatter: WikiFrontmatter = {
|
||||
...page.frontmatter,
|
||||
sl_refs: keptRefs,
|
||||
};
|
||||
await input.wikiService.writePage(
|
||||
parsedPath.scope,
|
||||
parsedPath.scopeId,
|
||||
parsedPath.pageKey,
|
||||
frontmatter,
|
||||
page.content,
|
||||
SYSTEM_AUTHOR,
|
||||
SYSTEM_EMAIL,
|
||||
`Repair semantic-layer refs: ${parsedPath.pageKey}`,
|
||||
);
|
||||
repairs.push({ ...parsedPath, removedRefs });
|
||||
}
|
||||
|
||||
return {
|
||||
repairs,
|
||||
warnings: [
|
||||
...warnings,
|
||||
...repairs.map(
|
||||
(repair) =>
|
||||
`Removed invalid sl_refs from ${repair.pageKey}: ${repair.removedRefs.join(', ')}`,
|
||||
),
|
||||
],
|
||||
};
|
||||
}
|
||||
|
|
@ -36,6 +36,7 @@ import { BaseTool, type GitAuthorResolverPort, type ToolContext } from '../tools
|
|||
import {
|
||||
type KnowledgeEventPort,
|
||||
type KnowledgeIndexPort,
|
||||
type KnowledgeIndexPageListing,
|
||||
KnowledgeWikiService,
|
||||
searchLocalKnowledgePages,
|
||||
WikiListTagsTool,
|
||||
|
|
@ -219,7 +220,7 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort {
|
|||
}
|
||||
|
||||
async listPagesForUser(userId: string) {
|
||||
const pages: Array<{ id?: string; page_key: string; summary: string; scope: string; scope_id: string | null }> = [];
|
||||
const pages: KnowledgeIndexPageListing[] = [];
|
||||
for (const scope of [
|
||||
{ scope: 'GLOBAL', scopeId: null, dir: 'knowledge/global' },
|
||||
{ scope: 'USER', scopeId: userId, dir: `knowledge/user/${userId}` },
|
||||
|
|
@ -234,6 +235,7 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort {
|
|||
summary: parsed.summary,
|
||||
scope: scope.scope,
|
||||
scope_id: scope.scopeId,
|
||||
tags: parseWikiTags(raw.content),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -433,7 +435,7 @@ class LocalMemoryToolsetFactory implements MemoryToolsetFactoryPort {
|
|||
};
|
||||
},
|
||||
}),
|
||||
new WikiListTagsTool(deps.wikiService, deps.knowledgeIndex),
|
||||
new WikiListTagsTool(deps.knowledgeIndex),
|
||||
new WikiWriteTool(deps.wikiService, deps.knowledgeIndex, deps.knowledgeEvents),
|
||||
new WikiRemoveTool(deps.wikiService, deps.knowledgeIndex, deps.knowledgeEvents),
|
||||
];
|
||||
|
|
@ -468,6 +470,17 @@ function parseWiki(raw: string): { summary: string; content: string } {
|
|||
};
|
||||
}
|
||||
|
||||
function parseWikiTags(raw: string): string[] {
|
||||
const match = raw.match(/^---\n([\s\S]*?)\n---\n?/);
|
||||
if (!match) {
|
||||
return [];
|
||||
}
|
||||
const frontmatter = (YAML.parse(match[1]) ?? {}) as Record<string, unknown>;
|
||||
return Array.isArray(frontmatter.tags)
|
||||
? frontmatter.tags.filter((tag): tag is string => typeof tag === 'string')
|
||||
: [];
|
||||
}
|
||||
|
||||
function scoreText(text: string, query: string): number {
|
||||
const normalized = query.toLowerCase().trim();
|
||||
if (!normalized) {
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ export type {
|
|||
KnowledgeEventPort,
|
||||
KnowledgeGitDiffPort,
|
||||
KnowledgeIndexPort,
|
||||
KnowledgeIndexPageListing,
|
||||
UpsertPageParams,
|
||||
WikiFileStorePort,
|
||||
} from './ports.js';
|
||||
|
|
|
|||
|
|
@ -113,13 +113,13 @@ describe('KnowledgeWikiService.syncFromCommit', () => {
|
|||
expect(call.deletes).toEqual([{ scope: 'GLOBAL', scopeId: null, pageKey: 'gone-page' }]);
|
||||
});
|
||||
|
||||
it('indexes historic-SQL nested pages but skips other nested wiki paths from commit sync', async () => {
|
||||
it('indexes only flat wiki pages and skips nested paths from commit sync', async () => {
|
||||
const { service, pagesRepository, gitService, logger } = makeService();
|
||||
|
||||
gitService.diffNameStatus.mockResolvedValue([
|
||||
{ status: 'A', path: 'knowledge/global/revenue-policy.md' },
|
||||
{ status: 'A', path: 'knowledge/global/historic-sql-order-lifecycle.md' },
|
||||
{ status: 'A', path: 'knowledge/global/historic-sql/order-lifecycle.md' },
|
||||
{ status: 'A', path: 'knowledge/global/historic-sql/_archived/retired-pattern.md' },
|
||||
{ status: 'A', path: 'knowledge/global/orbit/company-overview.md' },
|
||||
]);
|
||||
gitService.getFileAtCommit.mockImplementation((path: string) => {
|
||||
|
|
@ -138,26 +138,25 @@ describe('KnowledgeWikiService.syncFromCommit', () => {
|
|||
await service.syncFromCommit('sha-before', 'sha-after', 'run-uuid');
|
||||
|
||||
expect(gitService.getFileAtCommit).not.toHaveBeenCalledWith('knowledge/global/orbit/company-overview.md', 'sha-after');
|
||||
expect(gitService.getFileAtCommit).not.toHaveBeenCalledWith('knowledge/global/historic-sql/order-lifecycle.md', 'sha-after');
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
'[knowledge.sync] skipping unparseable path: knowledge/global/orbit/company-overview.md',
|
||||
);
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
'[knowledge.sync] skipping unparseable path: knowledge/global/historic-sql/order-lifecycle.md',
|
||||
);
|
||||
const call = pagesRepository.applyDiffTransactional.mock.calls[0][0];
|
||||
expect(call.upserts).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({ scope: 'GLOBAL', pageKey: 'revenue-policy', summary: 'revenue' }),
|
||||
expect.objectContaining({
|
||||
scope: 'GLOBAL',
|
||||
pageKey: 'historic-sql/order-lifecycle',
|
||||
pageKey: 'historic-sql-order-lifecycle',
|
||||
summary: 'order lifecycle',
|
||||
}),
|
||||
expect.objectContaining({
|
||||
scope: 'GLOBAL',
|
||||
pageKey: 'historic-sql/_archived/retired-pattern',
|
||||
summary: 'retired',
|
||||
}),
|
||||
]),
|
||||
);
|
||||
expect(call.upserts).toHaveLength(3);
|
||||
expect(call.upserts).toHaveLength(2);
|
||||
});
|
||||
|
||||
it('is a no-op when the diff between shas has no knowledge changes', async () => {
|
||||
|
|
|
|||
|
|
@ -11,10 +11,6 @@ const WIKI_PREFIX = 'knowledge';
|
|||
|
||||
export type { WikiFrontmatter };
|
||||
|
||||
function isHistoricSqlPathSegment(segment: string): boolean {
|
||||
return /^[a-zA-Z0-9_][a-zA-Z0-9_-]*$/.test(segment);
|
||||
}
|
||||
|
||||
export class KnowledgeWikiService {
|
||||
private isWorktreeScoped = false;
|
||||
|
||||
|
|
@ -422,7 +418,6 @@ export class KnowledgeWikiService {
|
|||
* Parse a `knowledge/<scope>/...` file path into its scope and page key.
|
||||
* `knowledge/global/foo.md` → { scope: 'GLOBAL', scopeId: null, pageKey: 'foo' }
|
||||
* `knowledge/user/<id>/bar.md` → { scope: 'USER', scopeId: '<id>', pageKey: 'bar' }
|
||||
* `knowledge/global/historic-sql/foo.md` → { scope: 'GLOBAL', scopeId: null, pageKey: 'historic-sql/foo' }
|
||||
*/
|
||||
function parseKnowledgePath(path: string): { scope: string; scopeId: string | null; pageKey: string } | null {
|
||||
if (!path.endsWith('.md')) {
|
||||
|
|
@ -437,13 +432,6 @@ function parseKnowledgePath(path: string): { scope: string; scopeId: string | nu
|
|||
const pageKey = rest[1].replace(/\.md$/, '');
|
||||
return isFlatWikiKey(pageKey) ? { scope: 'GLOBAL', scopeId: null, pageKey } : null;
|
||||
}
|
||||
if (rest.length >= 3 && rest[0] === 'global' && rest[1] === 'historic-sql') {
|
||||
const historicPath = rest.slice(2).join('/').replace(/\.md$/, '');
|
||||
if (historicPath.split('/').every(isHistoricSqlPathSegment)) {
|
||||
return { scope: 'GLOBAL', scopeId: null, pageKey: `historic-sql/${historicPath}` };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
if (rest.length === 3 && rest[0] === 'user') {
|
||||
const pageKey = rest[2].replace(/\.md$/, '');
|
||||
return isFlatWikiKey(pageKey) ? { scope: 'USER', scopeId: rest[1], pageKey } : null;
|
||||
|
|
|
|||
|
|
@ -244,4 +244,30 @@ describe('local knowledge helpers', () => {
|
|||
}),
|
||||
).rejects.toThrow('Invalid wiki key "orbit/company-overview". Wiki keys must be flat; use "orbit-company-overview".');
|
||||
});
|
||||
|
||||
it('ignores nested historic-SQL legacy paths when listing local knowledge pages', async () => {
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'historic-sql-paid-orders',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Flat historic SQL page',
|
||||
content: 'Flat page body.',
|
||||
tags: ['historic-sql'],
|
||||
});
|
||||
await project.fileStore.writeFile(
|
||||
'knowledge/global/historic-sql/paid-orders.md',
|
||||
'---\nsummary: Nested historic SQL page\nusage_mode: auto\n---\n\nNested body\n',
|
||||
'Test',
|
||||
'test@example.com',
|
||||
'Write nested legacy page',
|
||||
);
|
||||
|
||||
await expect(listLocalKnowledgePages(project, { userId: 'local' })).resolves.toEqual([
|
||||
{
|
||||
key: 'historic-sql-paid-orders',
|
||||
path: 'knowledge/global/historic-sql-paid-orders.md',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Flat historic SQL page',
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -80,26 +80,12 @@ function knowledgePath(scope: LocalKnowledgeScope, userId: string | undefined, k
|
|||
return `knowledge/user/${assertSafePathToken('user id', userId ?? 'local')}/${safeKey}.md`;
|
||||
}
|
||||
|
||||
function isHistoricSqlPathSegment(segment: string): boolean {
|
||||
return /^[a-zA-Z0-9_][a-zA-Z0-9_-]*$/.test(segment);
|
||||
}
|
||||
|
||||
function keyFromKnowledgePath(path: string, scope: LocalKnowledgeScope, userId: string): string | null {
|
||||
const prefix = scope === 'GLOBAL' ? 'knowledge/global/' : `knowledge/user/${assertSafePathToken('user id', userId)}/`;
|
||||
const key = path.slice(prefix.length).replace(/\.md$/, '');
|
||||
if (isFlatWikiKey(key)) {
|
||||
return key;
|
||||
}
|
||||
if (
|
||||
scope === 'GLOBAL' &&
|
||||
key.startsWith('historic-sql/') &&
|
||||
key
|
||||
.slice('historic-sql/'.length)
|
||||
.split('/')
|
||||
.every(isHistoricSqlPathSegment)
|
||||
) {
|
||||
return key;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -13,6 +13,15 @@ export interface UpsertPageParams {
|
|||
sourceRunId?: string | null;
|
||||
}
|
||||
|
||||
export interface KnowledgeIndexPageListing {
|
||||
id?: string;
|
||||
page_key: string;
|
||||
summary: string;
|
||||
scope: string;
|
||||
scope_id: string | null;
|
||||
tags: string[];
|
||||
}
|
||||
|
||||
export interface KnowledgeIndexPort {
|
||||
upsertPage(params: UpsertPageParams): Promise<void>;
|
||||
applyDiffTransactional(params: {
|
||||
|
|
@ -32,9 +41,7 @@ export interface KnowledgeIndexPort {
|
|||
scopeId: string | null,
|
||||
pageKey: string,
|
||||
): Promise<{ id?: string; page_key: string } | null | undefined>;
|
||||
listPagesForUser(
|
||||
userId: string,
|
||||
): Promise<Array<{ id?: string; page_key: string; summary: string; scope: string; scope_id: string | null }>>;
|
||||
listPagesForUser(userId: string): Promise<KnowledgeIndexPageListing[]>;
|
||||
getUserPageCount(userId: string): Promise<number>;
|
||||
incrementUsageCount(pageIds: string[]): Promise<void>;
|
||||
searchRRF(
|
||||
|
|
|
|||
|
|
@ -8,22 +8,11 @@ describe('WikiListTagsTool', () => {
|
|||
it("returns distinct sorted tags across the user's visible pages", async () => {
|
||||
const pagesRepository = {
|
||||
listPagesForUser: vi.fn().mockResolvedValue([
|
||||
{ scope: 'GLOBAL', scope_id: null, page_key: 'k1' },
|
||||
{ scope: 'USER', scope_id: 'u', page_key: 'k2' },
|
||||
{ scope: 'GLOBAL', scope_id: null, page_key: 'k1', tags: ['metrics', 'finance'] },
|
||||
{ scope: 'USER', scope_id: 'u', page_key: 'k2', tags: ['metrics'] },
|
||||
]),
|
||||
};
|
||||
const wikiService = {
|
||||
readPage: vi.fn().mockImplementation((_scope, _scopeId, key) => {
|
||||
if (key === 'k1') {
|
||||
return Promise.resolve({ frontmatter: { tags: ['metrics', 'finance'] }, content: '' });
|
||||
}
|
||||
if (key === 'k2') {
|
||||
return Promise.resolve({ frontmatter: { tags: ['metrics'] }, content: '' });
|
||||
}
|
||||
return Promise.resolve(null);
|
||||
}),
|
||||
};
|
||||
const tool = new WikiListTagsTool(wikiService as any, pagesRepository as any);
|
||||
const tool = new WikiListTagsTool(pagesRepository as any);
|
||||
|
||||
const result = await tool.call({}, baseContext);
|
||||
expect(result.markdown).toContain('finance');
|
||||
|
|
@ -31,10 +20,23 @@ describe('WikiListTagsTool', () => {
|
|||
expect(result.structured.tags).toEqual(['finance', 'metrics']);
|
||||
});
|
||||
|
||||
it('lists tags from historic-SQL indexed pages with flat wiki keys', async () => {
|
||||
const pagesRepository = {
|
||||
listPagesForUser: vi.fn().mockResolvedValue([
|
||||
{ scope: 'GLOBAL', scope_id: null, page_key: 'company-overview', tags: ['notion'] },
|
||||
{ scope: 'GLOBAL', scope_id: null, page_key: 'historic-sql-revenue-pattern', tags: ['historic-sql', 'pattern'] },
|
||||
]),
|
||||
};
|
||||
const tool = new WikiListTagsTool(pagesRepository as any);
|
||||
|
||||
const result = await tool.call({}, baseContext);
|
||||
|
||||
expect(result.structured.tags).toEqual(['historic-sql', 'notion', 'pattern']);
|
||||
});
|
||||
|
||||
it('returns a friendly message when no pages have tags', async () => {
|
||||
const pagesRepository = { listPagesForUser: vi.fn().mockResolvedValue([]) };
|
||||
const wikiService = { readPage: vi.fn() };
|
||||
const tool = new WikiListTagsTool(wikiService as any, pagesRepository as any);
|
||||
const tool = new WikiListTagsTool(pagesRepository as any);
|
||||
|
||||
const result = await tool.call({}, baseContext);
|
||||
expect(result.markdown).toMatch(/no tags/i);
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
import { z } from 'zod';
|
||||
import type { KnowledgeIndexPort } from '../ports.js';
|
||||
type BlockScope = 'GLOBAL' | 'USER';
|
||||
import { KnowledgeWikiService } from '../index.js';
|
||||
import { BaseTool, type ToolContext, type ToolOutput } from '../../tools/index.js';
|
||||
|
||||
const wikiListTagsInputSchema = z.object({});
|
||||
|
|
@ -11,10 +9,7 @@ type WikiListTagsInput = z.infer<typeof wikiListTagsInputSchema>;
|
|||
export class WikiListTagsTool extends BaseTool<typeof wikiListTagsInputSchema> {
|
||||
readonly name = 'wiki_list_tags';
|
||||
|
||||
constructor(
|
||||
private readonly wikiService: KnowledgeWikiService,
|
||||
private readonly pagesRepository: KnowledgeIndexPort,
|
||||
) {
|
||||
constructor(private readonly pagesRepository: KnowledgeIndexPort) {
|
||||
super();
|
||||
}
|
||||
|
||||
|
|
@ -33,10 +28,7 @@ Call before writing a new page so you can reuse existing tags consistently inste
|
|||
const pages = await this.pagesRepository.listPagesForUser(context.userId);
|
||||
const set = new Set<string>();
|
||||
for (const p of pages) {
|
||||
const scope = p.scope as BlockScope;
|
||||
const scopeId = scope === 'USER' ? p.scope_id : null;
|
||||
const page = await this.wikiService.readPage(scope, scopeId, p.page_key);
|
||||
for (const t of page?.frontmatter.tags ?? []) {
|
||||
for (const t of p.tags) {
|
||||
set.add(t);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -150,6 +150,7 @@ export class WikiWriteTool extends BaseTool<typeof wikiWriteInputSchema> {
|
|||
Create or update a knowledge page. Provide content for create/rewrite, or replacements for targeted edits.
|
||||
For existing pages, you may provide only frontmatter fields such as summary, tags, refs, or sl_refs to update metadata while preserving content.
|
||||
tags/refs/sl_refs use REPLACE semantics: omit to keep existing on update, [] to clear, [values] to set.
|
||||
Keys must be flat file names, not directory paths. Use tags/source frontmatter for grouping.
|
||||
</purpose>`;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ export interface WikiFrontmatter {
|
|||
usage?: HistoricSqlWikiUsageFrontmatter;
|
||||
fingerprints?: string[];
|
||||
stale_since?: string;
|
||||
archived_since?: string;
|
||||
}
|
||||
|
||||
export interface WikiPage {
|
||||
|
|
|
|||
|
|
@ -687,6 +687,12 @@ class SqlGenerator:
|
|||
if isinstance(node, exp.AggFunc):
|
||||
if isinstance(node, exp.Count):
|
||||
count_arg = node.this
|
||||
if isinstance(count_arg, exp.Star):
|
||||
node.set(
|
||||
"this",
|
||||
_make_case(exp.Literal.number(1)),
|
||||
)
|
||||
return node
|
||||
if (
|
||||
isinstance(count_arg, exp.Distinct)
|
||||
and count_arg.expressions
|
||||
|
|
|
|||
|
|
@ -243,6 +243,37 @@ def test_filtered_count_distinct_keeps_distinct_inside_count():
|
|||
assert_valid_sql(result.sql)
|
||||
|
||||
|
||||
def test_filtered_count_star_uses_case_one_not_case_star():
|
||||
engine = make_engine(
|
||||
{
|
||||
"accounts": {
|
||||
"name": "accounts",
|
||||
"table": "public.accounts",
|
||||
"grain": ["id"],
|
||||
"columns": [
|
||||
{"name": "id", "type": "number"},
|
||||
{"name": "risk_level", "type": "string"},
|
||||
],
|
||||
"measures": [
|
||||
{
|
||||
"name": "high_risk_account_count",
|
||||
"expr": "count(*)",
|
||||
"filter": "risk_level = 'high'",
|
||||
}
|
||||
],
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
result = engine.query(
|
||||
{"measures": ["accounts.high_risk_account_count"], "dimensions": []}
|
||||
)
|
||||
|
||||
assert "THEN *" not in result.sql
|
||||
assert "COUNT(CASE WHEN accounts.risk_level = 'high' THEN 1 END)" in result.sql
|
||||
assert_valid_sql(result.sql)
|
||||
|
||||
|
||||
def test_predefined_measure_via_alias_uses_real_table_and_alias_qualification():
|
||||
engine = make_engine(_alias_measure_sources())
|
||||
result = engine.query(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue