mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-19 08:28:06 +02:00
Initial open-source release
This commit is contained in:
commit
1a42152e6f
1199 changed files with 257054 additions and 0 deletions
13
packages/context/src/tools/authors.ts
Normal file
13
packages/context/src/tools/authors.ts
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
export interface GitAuthor {
|
||||
name: string;
|
||||
email: string;
|
||||
}
|
||||
|
||||
export interface GitAuthorResolverPort {
|
||||
resolve(userId: string | null | undefined): Promise<GitAuthor>;
|
||||
}
|
||||
|
||||
export const SYSTEM_GIT_AUTHOR: GitAuthor = {
|
||||
name: 'System User',
|
||||
email: 'system@example.com',
|
||||
};
|
||||
174
packages/context/src/tools/base-tool.ts
Normal file
174
packages/context/src/tools/base-tool.ts
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
import { tool } from 'ai';
|
||||
import { z, type ZodType } from 'zod';
|
||||
import { noopLogger, type KloLogger } from '../core/index.js';
|
||||
import type { IngestToolMetadata, ToolSession } from './tool-session.js';
|
||||
|
||||
export interface ToolOutput<T = unknown> {
|
||||
markdown: string;
|
||||
structured: T;
|
||||
}
|
||||
|
||||
export interface ToolTimingTrackerPort {
|
||||
recordToolExecutionStart(messageId: string, toolName: string, toolCallId: string): void;
|
||||
recordToolExecutionEnd(messageId: string, toolName: string, toolCallId: string, state: string): void;
|
||||
}
|
||||
|
||||
export interface ToolProgressRelayPort {
|
||||
emit(event: unknown): void;
|
||||
}
|
||||
|
||||
type ChatSource =
|
||||
| 'RESEARCH'
|
||||
| 'DASHBOARD'
|
||||
| 'WIDGET_CONFIG'
|
||||
| 'EVALUATION'
|
||||
| 'METRIC_WORKSHOP'
|
||||
| 'INPUT_CONFIG'
|
||||
| 'SCHEDULED_RESEARCH'
|
||||
| 'DASHBOARD_GENERATION';
|
||||
|
||||
export interface ToolContext {
|
||||
sourceId: string;
|
||||
messageId: string;
|
||||
userId: string;
|
||||
userRoles?: string[];
|
||||
authToken?: string;
|
||||
currentUserMessage?: string;
|
||||
toolCallId?: string;
|
||||
toolCallHistory?: string[];
|
||||
timingTracker?: ToolTimingTrackerPort;
|
||||
source?: ChatSource;
|
||||
dashboardId?: string;
|
||||
methodologyEntries?: MethodologyEntry[];
|
||||
progressRelay?: ToolProgressRelayPort;
|
||||
connectionId?: string;
|
||||
ingest?: IngestToolMetadata;
|
||||
/**
|
||||
* Per-session state (ingest WU, memory-agent post-turn). When present, SL/wiki
|
||||
* tools use session-scoped services and emit touched-set entries instead of
|
||||
* writing to shared indexes immediately. Non-session callers leave this unset.
|
||||
*/
|
||||
session?: ToolSession;
|
||||
currentDefinition?: {
|
||||
sql: string;
|
||||
measures: unknown[];
|
||||
dimensions: unknown[];
|
||||
parameters: unknown[];
|
||||
segments: unknown[];
|
||||
name?: string;
|
||||
description?: string;
|
||||
};
|
||||
}
|
||||
|
||||
export interface MethodologyEntry {
|
||||
key: string;
|
||||
toolName: string;
|
||||
label: string;
|
||||
args: Record<string, unknown>;
|
||||
result?: unknown;
|
||||
}
|
||||
|
||||
/**
|
||||
* SECURITY: All tools require authentication. userId must always be provided in ToolContext.
|
||||
*/
|
||||
export abstract class BaseTool<TInput extends ZodType = ZodType> {
|
||||
protected readonly logger: KloLogger;
|
||||
|
||||
abstract readonly name: string;
|
||||
|
||||
constructor(logger: KloLogger = noopLogger) {
|
||||
this.logger = logger;
|
||||
}
|
||||
|
||||
abstract get description(): string;
|
||||
|
||||
abstract get inputSchema(): TInput;
|
||||
|
||||
abstract call(input: z.infer<TInput>, context: ToolContext): Promise<any>;
|
||||
|
||||
getParametersSchema(): {
|
||||
type: 'object';
|
||||
properties: Record<string, any>;
|
||||
required?: string[];
|
||||
} {
|
||||
const jsonSchema = z.toJSONSchema(this.inputSchema, {
|
||||
target: 'draft-7',
|
||||
});
|
||||
|
||||
return jsonSchema as any;
|
||||
}
|
||||
|
||||
toAnthropicFormat(): {
|
||||
name: string;
|
||||
description: string;
|
||||
input_schema: {
|
||||
type: 'object';
|
||||
properties: Record<string, any>;
|
||||
required?: string[];
|
||||
};
|
||||
} {
|
||||
return {
|
||||
name: this.name,
|
||||
description: this.description,
|
||||
input_schema: this.getParametersSchema(),
|
||||
};
|
||||
}
|
||||
|
||||
toAiSdkTool(context: ToolContext): any {
|
||||
const toolName = this.name;
|
||||
const logger = this.logger;
|
||||
|
||||
return tool({
|
||||
description: this.description,
|
||||
inputSchema: this.inputSchema,
|
||||
execute: async (params, { toolCallId }) => {
|
||||
// Create context copy with current toolCallId (safe for parallel execution)
|
||||
const callContext = { ...context, toolCallId };
|
||||
|
||||
// Record tool execution start (input generation has already been tracked via onChunk)
|
||||
if (callContext.timingTracker && toolCallId) {
|
||||
callContext.timingTracker.recordToolExecutionStart(callContext.messageId, toolName, toolCallId);
|
||||
}
|
||||
|
||||
let state = 'completed';
|
||||
try {
|
||||
if (!callContext.userId) {
|
||||
throw new Error('Authentication required: userId must be provided in ToolContext');
|
||||
}
|
||||
const parsedInput = this.parseInput(params as Record<string, any>);
|
||||
const result = await this.call(parsedInput, callContext);
|
||||
return result;
|
||||
} catch (error) {
|
||||
state = 'error';
|
||||
this.logger.error(
|
||||
`Tool ${this.name} execution failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
throw error;
|
||||
} finally {
|
||||
// Record tool execution end
|
||||
if (callContext.timingTracker && toolCallId) {
|
||||
callContext.timingTracker.recordToolExecutionEnd(callContext.messageId, toolName, toolCallId, state);
|
||||
}
|
||||
}
|
||||
},
|
||||
// Send only markdown to LLM - frontend still receives full { markdown, structured } via stream
|
||||
toModelOutput: ({ output }) => {
|
||||
if (output && typeof output === 'object' && 'markdown' in output) {
|
||||
return { type: 'content', value: [{ type: 'text', text: output.markdown as string }] };
|
||||
}
|
||||
if (typeof output !== 'string') {
|
||||
logger.warn(`Tool ${toolName} returned unexpected output type: ${typeof output}. Coercing to string.`);
|
||||
}
|
||||
return { type: 'content', value: [{ type: 'text', text: String(output) }] };
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
parseInput(input: Record<string, any>): z.infer<TInput> {
|
||||
return this.inputSchema.parse(input);
|
||||
}
|
||||
|
||||
protected getCurrentUserQuery(context: ToolContext): string | null {
|
||||
return context.currentUserMessage ?? null;
|
||||
}
|
||||
}
|
||||
64
packages/context/src/tools/context-candidate-mark.tool.ts
Normal file
64
packages/context/src/tools/context-candidate-mark.tool.ts
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
import { z } from 'zod';
|
||||
import { BaseTool, type ToolContext, type ToolOutput } from './base-tool.js';
|
||||
import type { ContextEvidenceToolStorePort } from './context-evidence-tool-store.js';
|
||||
import { ingestMetadataRequired, resolveIngestMetadata, type ToolFailure } from './context-ingest-metadata.js';
|
||||
|
||||
const contextCandidateMarkInputSchema = z.object({
|
||||
candidateKey: z.string().min(1),
|
||||
status: z.enum(['pending', 'promoted', 'merged', 'rejected', 'conflict']),
|
||||
rejectionReason: z.string().max(500).nullable().default(null),
|
||||
});
|
||||
|
||||
type ContextCandidateMarkInput = z.infer<typeof contextCandidateMarkInputSchema>;
|
||||
|
||||
interface ContextCandidateMarkStructured {
|
||||
success: boolean;
|
||||
error?: string;
|
||||
candidateKey?: string;
|
||||
status?: string;
|
||||
}
|
||||
|
||||
export class ContextCandidateMarkTool extends BaseTool<typeof contextCandidateMarkInputSchema> {
|
||||
readonly name = 'context_candidate_mark';
|
||||
|
||||
constructor(private readonly store: ContextEvidenceToolStorePort) {
|
||||
super();
|
||||
}
|
||||
|
||||
get description(): string {
|
||||
return 'Mark a context knowledge candidate after curator reconciliation promotes, merges, rejects, or keeps it as a conflict.';
|
||||
}
|
||||
|
||||
get inputSchema() {
|
||||
return contextCandidateMarkInputSchema;
|
||||
}
|
||||
|
||||
async call(
|
||||
input: ContextCandidateMarkInput,
|
||||
context: ToolContext,
|
||||
): Promise<ToolOutput<ContextCandidateMarkStructured | ToolFailure>> {
|
||||
const ingest = resolveIngestMetadata(context);
|
||||
if (!ingest) {
|
||||
return ingestMetadataRequired();
|
||||
}
|
||||
|
||||
const updated = await this.store.updateCandidateStatus({
|
||||
runId: ingest.runId,
|
||||
candidateKey: input.candidateKey,
|
||||
status: input.status,
|
||||
rejectionReason: input.rejectionReason,
|
||||
});
|
||||
|
||||
if (!updated) {
|
||||
return {
|
||||
markdown: `No candidate found with key "${input.candidateKey}".`,
|
||||
structured: { success: false, error: 'CANDIDATE_NOT_FOUND', candidateKey: input.candidateKey },
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
markdown: `Candidate "${updated.candidate_key}" marked ${updated.status}.`,
|
||||
structured: { success: true, candidateKey: updated.candidate_key, status: updated.status },
|
||||
};
|
||||
}
|
||||
}
|
||||
179
packages/context/src/tools/context-candidate-write.tool.ts
Normal file
179
packages/context/src/tools/context-candidate-write.tool.ts
Normal file
|
|
@ -0,0 +1,179 @@
|
|||
import { createHash } from 'node:crypto';
|
||||
import { z } from 'zod';
|
||||
import type { KloEmbeddingPort } from '../core/index.js';
|
||||
import { buildContextCandidateEmbeddingText } from '../ingest/context-candidates/index.js';
|
||||
import { BaseTool, type ToolContext, type ToolOutput } from './base-tool.js';
|
||||
import { chunkIdSchema } from './context-evidence-ids.js';
|
||||
import type { ContextEvidenceToolStorePort } from './context-evidence-tool-store.js';
|
||||
import { ingestMetadataRequired, resolveIngestMetadata, type ToolFailure } from './context-ingest-metadata.js';
|
||||
|
||||
const scoreSchema = z.number().int().min(0).max(3);
|
||||
|
||||
const contextCandidateWriteInputSchema = z.object({
|
||||
candidateKey: z.string().min(1).max(160),
|
||||
topic: z.string().min(1).max(200),
|
||||
assertion: z.string().min(1).max(500),
|
||||
rationale: z.string().min(1).max(1000),
|
||||
evidenceChunkIds: z.array(chunkIdSchema).min(1),
|
||||
suggestedPageKey: z.string().min(1).max(120).optional(),
|
||||
actionHint: z.enum(['create', 'update', 'merge', 'conflict', 'skip']),
|
||||
durabilityScore: scoreSchema,
|
||||
authorityScore: scoreSchema,
|
||||
reuseScore: scoreSchema,
|
||||
noveltyScore: scoreSchema,
|
||||
riskScore: scoreSchema,
|
||||
});
|
||||
|
||||
type ContextCandidateWriteInput = z.infer<typeof contextCandidateWriteInputSchema>;
|
||||
|
||||
interface ContextCandidateWriteStructured {
|
||||
success: boolean;
|
||||
error?: string;
|
||||
message?: string;
|
||||
candidateKey?: string;
|
||||
promotionScore?: number;
|
||||
status?: string;
|
||||
}
|
||||
|
||||
export class ContextCandidateWriteTool extends BaseTool<typeof contextCandidateWriteInputSchema> {
|
||||
readonly name = 'context_candidate_write';
|
||||
|
||||
constructor(
|
||||
private readonly store: ContextEvidenceToolStorePort,
|
||||
private readonly embeddingService: Pick<KloEmbeddingPort, 'computeEmbedding'>,
|
||||
) {
|
||||
super();
|
||||
}
|
||||
|
||||
get description(): string {
|
||||
return 'Write a durable knowledge candidate from indexed context evidence. Use this during ingest candidate extraction instead of wiki_write.';
|
||||
}
|
||||
|
||||
get inputSchema() {
|
||||
return contextCandidateWriteInputSchema;
|
||||
}
|
||||
|
||||
async call(
|
||||
input: ContextCandidateWriteInput,
|
||||
context: ToolContext,
|
||||
): Promise<ToolOutput<ContextCandidateWriteStructured | ToolFailure>> {
|
||||
const ingest = resolveIngestMetadata(context);
|
||||
if (!ingest) {
|
||||
return ingestMetadataRequired();
|
||||
}
|
||||
|
||||
const connectionId = context.connectionId ?? context.session?.connectionId;
|
||||
if (!connectionId) {
|
||||
return {
|
||||
markdown: 'Error: no connectionId is available for candidate write.',
|
||||
structured: {
|
||||
success: false,
|
||||
error: 'CONNECTION_REQUIRED',
|
||||
message: 'Run this inside an ingest session with a connectionId.',
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
if (input.evidenceChunkIds.length === 0) {
|
||||
return {
|
||||
markdown: 'Error: candidates require at least one evidence chunk.',
|
||||
structured: { success: false, error: 'EVIDENCE_REQUIRED', message: 'Provide one or more evidenceChunkIds.' },
|
||||
};
|
||||
}
|
||||
|
||||
const chunks = await this.store.readChunksByIds(
|
||||
input.evidenceChunkIds,
|
||||
connectionId,
|
||||
ingest.sourceKey,
|
||||
ingest.runId,
|
||||
);
|
||||
if (chunks.length !== input.evidenceChunkIds.length) {
|
||||
const found = new Set(chunks.map((chunk) => chunk.chunkId));
|
||||
const missing = input.evidenceChunkIds.filter((id) => !found.has(id));
|
||||
return {
|
||||
markdown: `Error: evidence chunks not found or not visible: ${missing.join(', ')}`,
|
||||
structured: {
|
||||
success: false,
|
||||
error: 'EVIDENCE_NOT_FOUND',
|
||||
message: `Missing evidence chunk ids: ${missing.join(', ')}`,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const promotionScore =
|
||||
input.durabilityScore + input.authorityScore + input.reuseScore + input.noveltyScore - input.riskScore;
|
||||
const status = input.actionHint === 'conflict' ? 'conflict' : input.actionHint === 'skip' ? 'rejected' : 'pending';
|
||||
const evidenceRefs = chunks.map((chunk) => ({
|
||||
chunkId: chunk.chunkId,
|
||||
stableCitationKey: chunk.stableCitationKey,
|
||||
syncId: chunk.syncId,
|
||||
rawPath: chunk.rawPath,
|
||||
title: chunk.title,
|
||||
path: chunk.path,
|
||||
url: chunk.url,
|
||||
lastEditedAt: chunk.lastEditedAt?.toISOString() ?? null,
|
||||
snippetHash: createHash('sha256').update(chunk.content).digest('hex'),
|
||||
citation: chunk.citation,
|
||||
}));
|
||||
const embedding = await this.computeCandidateEmbedding(input);
|
||||
|
||||
try {
|
||||
const candidate = await this.store.insertCandidate({
|
||||
runId: ingest.runId,
|
||||
connectionId,
|
||||
sourceKey: ingest.sourceKey,
|
||||
candidateKey: input.candidateKey,
|
||||
topic: input.topic,
|
||||
assertion: input.assertion,
|
||||
rationale: input.rationale,
|
||||
evidenceChunkIds: input.evidenceChunkIds,
|
||||
evidenceRefs,
|
||||
suggestedPageKey: input.suggestedPageKey ?? null,
|
||||
actionHint: input.actionHint,
|
||||
durabilityScore: input.durabilityScore,
|
||||
authorityScore: input.authorityScore,
|
||||
reuseScore: input.reuseScore,
|
||||
noveltyScore: input.noveltyScore,
|
||||
riskScore: input.riskScore,
|
||||
promotionScore,
|
||||
status,
|
||||
rejectionReason: input.actionHint === 'skip' ? 'Extractor marked this candidate as skip.' : null,
|
||||
embedding,
|
||||
});
|
||||
|
||||
return {
|
||||
markdown: `Candidate "${candidate.candidate_key}" saved with promotion score ${candidate.promotion_score}.`,
|
||||
structured: {
|
||||
success: true,
|
||||
candidateKey: candidate.candidate_key,
|
||||
promotionScore: candidate.promotion_score,
|
||||
status: candidate.status,
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
markdown: `Error: candidate "${input.candidateKey}" could not be saved.`,
|
||||
structured: {
|
||||
success: false,
|
||||
error: 'CANDIDATE_WRITE_FAILED',
|
||||
message: error instanceof Error ? error.message : String(error),
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private async computeCandidateEmbedding(
|
||||
input: Pick<ContextCandidateWriteInput, 'topic' | 'assertion'>,
|
||||
): Promise<number[] | null> {
|
||||
try {
|
||||
return await this.embeddingService.computeEmbedding(buildContextCandidateEmbeddingText(input));
|
||||
} catch (error) {
|
||||
this.logger.warn(
|
||||
`Candidate embedding generation failed for topic "${input.topic}": ${
|
||||
error instanceof Error ? error.message : String(error)
|
||||
}`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
16
packages/context/src/tools/context-evidence-ids.ts
Normal file
16
packages/context/src/tools/context-evidence-ids.ts
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
import { z } from 'zod';
|
||||
|
||||
const UUID_BODY = '[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}';
|
||||
|
||||
const CHUNK_ID_PATTERN = new RegExp(`^ctxchunk-${UUID_BODY}$`);
|
||||
const DOCUMENT_ID_PATTERN = new RegExp(`^ctxdoc-${UUID_BODY}$`);
|
||||
|
||||
export const chunkIdSchema = z
|
||||
.string()
|
||||
.regex(CHUNK_ID_PATTERN, 'Use a chunkId returned by context_evidence_search (format: "ctxchunk-<uuid>").')
|
||||
.describe('A chunkId from context_evidence_search results, e.g. "ctxchunk-<uuid>".');
|
||||
|
||||
export const documentIdSchema = z
|
||||
.string()
|
||||
.regex(DOCUMENT_ID_PATTERN, 'Use a documentId returned by context_evidence_search or context_evidence_neighbors (format: "ctxdoc-<uuid>").')
|
||||
.describe('A documentId from context_evidence_search or context_evidence_neighbors results, e.g. "ctxdoc-<uuid>".');
|
||||
|
|
@ -0,0 +1,99 @@
|
|||
import { z } from 'zod';
|
||||
import { BaseTool, type ToolContext, type ToolOutput } from './base-tool.js';
|
||||
import { documentIdSchema } from './context-evidence-ids.js';
|
||||
import type { ContextEvidenceToolStorePort } from './context-evidence-tool-store.js';
|
||||
import { ingestMetadataRequired, resolveIngestMetadata, type ToolFailure } from './context-ingest-metadata.js';
|
||||
|
||||
const contextEvidenceNeighborsInputSchema = z.object({
|
||||
documentId: documentIdSchema,
|
||||
relation: z.enum(['parent', 'children', 'linked', 'backlinked', 'same_path']),
|
||||
limit: z.number().int().min(1).max(25).default(10),
|
||||
});
|
||||
|
||||
type ContextEvidenceNeighborsInput = z.infer<typeof contextEvidenceNeighborsInputSchema>;
|
||||
|
||||
interface ContextEvidenceNeighborsStructured {
|
||||
success: true;
|
||||
results: Array<{
|
||||
documentId: string;
|
||||
externalId: string;
|
||||
title: string;
|
||||
path: string;
|
||||
relation: string;
|
||||
url: string | null;
|
||||
lastEditedAt: string | null;
|
||||
}>;
|
||||
totalFound: number;
|
||||
}
|
||||
|
||||
export class ContextEvidenceNeighborsTool extends BaseTool<typeof contextEvidenceNeighborsInputSchema> {
|
||||
readonly name = 'context_evidence_neighbors';
|
||||
|
||||
constructor(private readonly store: ContextEvidenceToolStorePort) {
|
||||
super();
|
||||
}
|
||||
|
||||
get description(): string {
|
||||
return 'Find parent, child, linked, backlinked, or same-folder evidence documents for the current ingest source.';
|
||||
}
|
||||
|
||||
get inputSchema() {
|
||||
return contextEvidenceNeighborsInputSchema;
|
||||
}
|
||||
|
||||
async call(
|
||||
input: ContextEvidenceNeighborsInput,
|
||||
context: ToolContext,
|
||||
): Promise<ToolOutput<ContextEvidenceNeighborsStructured | ToolFailure>> {
|
||||
const ingest = resolveIngestMetadata(context);
|
||||
if (!ingest) {
|
||||
return ingestMetadataRequired();
|
||||
}
|
||||
|
||||
const connectionId = context.connectionId ?? context.session?.connectionId;
|
||||
if (!connectionId) {
|
||||
return {
|
||||
markdown: 'Error: no connectionId is available for context evidence neighbors.',
|
||||
structured: {
|
||||
success: false,
|
||||
error: 'CONNECTION_REQUIRED',
|
||||
message: 'Run this inside an ingest session with a connectionId.',
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const results = await this.store.findNeighborDocuments({
|
||||
connectionId,
|
||||
sourceKey: ingest.sourceKey,
|
||||
documentId: input.documentId,
|
||||
relation: input.relation,
|
||||
limit: input.limit,
|
||||
currentRunId: ingest.runId,
|
||||
});
|
||||
|
||||
if (results.length === 0) {
|
||||
return {
|
||||
markdown: `No ${input.relation} evidence documents found.`,
|
||||
structured: { success: true, results: [], totalFound: 0 },
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
markdown: [
|
||||
`Found ${results.length} ${input.relation} evidence document(s):`,
|
||||
'',
|
||||
...results.map(
|
||||
(result, index) => `${index + 1}. **${result.title}** (${result.path}) documentId=${result.documentId}`,
|
||||
),
|
||||
].join('\n'),
|
||||
structured: {
|
||||
success: true,
|
||||
totalFound: results.length,
|
||||
results: results.map((result) => ({
|
||||
...result,
|
||||
lastEditedAt: result.lastEditedAt?.toISOString() ?? null,
|
||||
})),
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
153
packages/context/src/tools/context-evidence-read.tool.ts
Normal file
153
packages/context/src/tools/context-evidence-read.tool.ts
Normal file
|
|
@ -0,0 +1,153 @@
|
|||
import { z } from 'zod';
|
||||
import { BaseTool, type ToolContext, type ToolOutput } from './base-tool.js';
|
||||
import { chunkIdSchema, documentIdSchema } from './context-evidence-ids.js';
|
||||
import type { ContextEvidenceToolStorePort } from './context-evidence-tool-store.js';
|
||||
import { ingestMetadataRequired, resolveIngestMetadata, type ToolFailure } from './context-ingest-metadata.js';
|
||||
|
||||
const contextEvidenceReadInputSchema = z
|
||||
.object({
|
||||
chunkId: chunkIdSchema.optional(),
|
||||
documentId: documentIdSchema.optional(),
|
||||
externalId: z.string().min(1).optional(),
|
||||
includeNeighborChunks: z.boolean().default(false),
|
||||
})
|
||||
.refine((input) => [input.chunkId, input.documentId, input.externalId].filter(Boolean).length === 1, {
|
||||
message: 'Provide exactly one of chunkId, documentId, or externalId.',
|
||||
});
|
||||
|
||||
type ContextEvidenceReadInput = z.infer<typeof contextEvidenceReadInputSchema>;
|
||||
|
||||
interface ContextEvidenceReadStructured {
|
||||
success: true;
|
||||
found: boolean;
|
||||
documentId?: string;
|
||||
chunkId?: string;
|
||||
externalId?: string;
|
||||
title?: string;
|
||||
path?: string;
|
||||
url?: string | null;
|
||||
content?: string;
|
||||
citation?: unknown;
|
||||
}
|
||||
|
||||
export class ContextEvidenceReadTool extends BaseTool<typeof contextEvidenceReadInputSchema> {
|
||||
readonly name = 'context_evidence_read';
|
||||
|
||||
constructor(private readonly store: ContextEvidenceToolStorePort) {
|
||||
super();
|
||||
}
|
||||
|
||||
get description(): string {
|
||||
return 'Read a context evidence chunk or document by chunkId, documentId, or externalId.';
|
||||
}
|
||||
|
||||
get inputSchema() {
|
||||
return contextEvidenceReadInputSchema;
|
||||
}
|
||||
|
||||
async call(
|
||||
input: ContextEvidenceReadInput,
|
||||
context: ToolContext,
|
||||
): Promise<ToolOutput<ContextEvidenceReadStructured | ToolFailure>> {
|
||||
const ingest = resolveIngestMetadata(context);
|
||||
if (!ingest) {
|
||||
return ingestMetadataRequired();
|
||||
}
|
||||
|
||||
if (input.chunkId) {
|
||||
const connectionId = context.connectionId ?? context.session?.connectionId;
|
||||
if (!connectionId) {
|
||||
return {
|
||||
markdown: 'Error: no connectionId is available for evidence read.',
|
||||
structured: { success: false, error: 'CONNECTION_REQUIRED', message: 'Run inside an ingest session.' },
|
||||
};
|
||||
}
|
||||
const found = await this.store.readChunkById(input.chunkId, connectionId, ingest.sourceKey, ingest.runId);
|
||||
if (!found) {
|
||||
return {
|
||||
markdown: `No evidence chunk found for ${input.chunkId}.`,
|
||||
structured: { success: true, found: false },
|
||||
};
|
||||
}
|
||||
if (input.includeNeighborChunks) {
|
||||
const document = await this.store.readDocumentById(
|
||||
found.document.id,
|
||||
connectionId,
|
||||
ingest.sourceKey,
|
||||
ingest.runId,
|
||||
);
|
||||
const content = document?.chunks.map((chunk) => chunk.content).join('\n\n') ?? found.chunk.content;
|
||||
return {
|
||||
markdown: `## ${found.document.title}\n\n${content}`,
|
||||
structured: {
|
||||
success: true,
|
||||
found: true,
|
||||
documentId: found.document.id,
|
||||
chunkId: found.chunk.id,
|
||||
externalId: found.document.external_id,
|
||||
title: found.document.title,
|
||||
path: found.document.path,
|
||||
url: found.document.url,
|
||||
content,
|
||||
citation: found.chunk.citation,
|
||||
},
|
||||
};
|
||||
}
|
||||
return {
|
||||
markdown: `## ${found.document.title}\n\n${found.chunk.content}`,
|
||||
structured: {
|
||||
success: true,
|
||||
found: true,
|
||||
documentId: found.document.id,
|
||||
chunkId: found.chunk.id,
|
||||
externalId: found.document.external_id,
|
||||
title: found.document.title,
|
||||
path: found.document.path,
|
||||
url: found.document.url,
|
||||
content: found.chunk.content,
|
||||
citation: found.chunk.citation,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const connectionId = context.connectionId ?? context.session?.connectionId;
|
||||
if (!connectionId) {
|
||||
return {
|
||||
markdown: 'Error: no connectionId is available for evidence read.',
|
||||
structured: { success: false, error: 'CONNECTION_REQUIRED', message: 'Run inside an ingest session.' },
|
||||
};
|
||||
}
|
||||
let document: Awaited<ReturnType<ContextEvidenceToolStorePort['readDocumentById']>>;
|
||||
if (input.documentId) {
|
||||
document = await this.store.readDocumentById(input.documentId, connectionId, ingest.sourceKey, ingest.runId);
|
||||
} else if (input.externalId) {
|
||||
document = await this.store.readDocumentByExternalId(
|
||||
connectionId,
|
||||
ingest.sourceKey,
|
||||
input.externalId,
|
||||
ingest.runId,
|
||||
);
|
||||
} else {
|
||||
return { markdown: 'No evidence document found.', structured: { success: true, found: false } };
|
||||
}
|
||||
|
||||
if (!document) {
|
||||
return { markdown: 'No evidence document found.', structured: { success: true, found: false } };
|
||||
}
|
||||
|
||||
const content = document.chunks.map((chunk) => chunk.content).join('\n\n');
|
||||
return {
|
||||
markdown: `## ${document.document.title}\n\n${content}`,
|
||||
structured: {
|
||||
success: true,
|
||||
found: true,
|
||||
documentId: document.document.id,
|
||||
externalId: document.document.external_id,
|
||||
title: document.document.title,
|
||||
path: document.document.path,
|
||||
url: document.document.url,
|
||||
content,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
142
packages/context/src/tools/context-evidence-search.tool.ts
Normal file
142
packages/context/src/tools/context-evidence-search.tool.ts
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
import { z } from 'zod';
|
||||
import type { KloEmbeddingPort } from '../core/index.js';
|
||||
import { BaseTool, type ToolContext, type ToolOutput } from './base-tool.js';
|
||||
import type { ContextEvidenceToolStorePort } from './context-evidence-tool-store.js';
|
||||
import { ingestMetadataRequired, resolveIngestMetadata, type ToolFailure } from './context-ingest-metadata.js';
|
||||
|
||||
const contextEvidenceSearchInputSchema = z.object({
|
||||
query: z.string().min(1),
|
||||
connectionId: z.string().uuid().optional(),
|
||||
sourceKey: z.string().min(1).optional(),
|
||||
limit: z.number().int().min(1).max(25).default(10),
|
||||
includeDeleted: z.boolean().default(false),
|
||||
});
|
||||
|
||||
type ContextEvidenceSearchInput = z.infer<typeof contextEvidenceSearchInputSchema>;
|
||||
|
||||
interface ContextEvidenceSearchStructured {
|
||||
success: true;
|
||||
results: Array<{
|
||||
chunkId: string;
|
||||
documentId: string;
|
||||
externalId: string;
|
||||
title: string;
|
||||
path: string;
|
||||
url: string | null;
|
||||
snippet: string;
|
||||
score: number;
|
||||
matchReasons?: string[];
|
||||
lanes?: Array<{
|
||||
lane: string;
|
||||
status: 'available' | 'skipped' | 'failed';
|
||||
requestedCandidatePoolLimit: number;
|
||||
effectiveCandidatePoolLimit: number;
|
||||
returnedCandidateCount: number;
|
||||
weight: number;
|
||||
reason?: string;
|
||||
}>;
|
||||
citation: unknown;
|
||||
stableCitationKey: string;
|
||||
syncId: string;
|
||||
lastEditedAt: string | null;
|
||||
}>;
|
||||
totalFound: number;
|
||||
}
|
||||
|
||||
export class ContextEvidenceSearchTool extends BaseTool<typeof contextEvidenceSearchInputSchema> {
|
||||
readonly name = 'context_evidence_search';
|
||||
|
||||
constructor(
|
||||
private readonly store: ContextEvidenceToolStorePort,
|
||||
private readonly embeddingService: Pick<KloEmbeddingPort, 'computeEmbedding'>,
|
||||
) {
|
||||
super();
|
||||
}
|
||||
|
||||
get description(): string {
|
||||
return (
|
||||
'Search the internal context evidence index for the current ingest source. ' +
|
||||
'Use this to research indexed evidence before writing candidates or curating wiki knowledge.'
|
||||
);
|
||||
}
|
||||
|
||||
get inputSchema() {
|
||||
return contextEvidenceSearchInputSchema;
|
||||
}
|
||||
|
||||
async call(
|
||||
input: ContextEvidenceSearchInput,
|
||||
context: ToolContext,
|
||||
): Promise<ToolOutput<ContextEvidenceSearchStructured | ToolFailure>> {
|
||||
const ingest = resolveIngestMetadata(context);
|
||||
if (!ingest) {
|
||||
return ingestMetadataRequired();
|
||||
}
|
||||
|
||||
let queryEmbedding: number[] | null = null;
|
||||
try {
|
||||
queryEmbedding = await this.embeddingService.computeEmbedding(input.query);
|
||||
} catch {
|
||||
queryEmbedding = null;
|
||||
}
|
||||
|
||||
const connectionId = input.connectionId ?? context.connectionId ?? context.session?.connectionId;
|
||||
if (!connectionId) {
|
||||
return {
|
||||
markdown: 'Error: no connectionId is available for context evidence search.',
|
||||
structured: {
|
||||
success: false,
|
||||
error: 'CONNECTION_REQUIRED',
|
||||
message: 'Provide connectionId or run this inside an ingest session with a connectionId.',
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const results = await this.store.searchRRF({
|
||||
connectionId,
|
||||
sourceKey: input.sourceKey ?? ingest.sourceKey,
|
||||
queryEmbedding,
|
||||
queryText: input.query,
|
||||
limit: input.limit,
|
||||
includeDeleted: input.includeDeleted,
|
||||
currentRunId: ingest.runId,
|
||||
});
|
||||
|
||||
if (results.length === 0) {
|
||||
return {
|
||||
markdown: `No context evidence found for "${input.query}".`,
|
||||
structured: { success: true, results: [], totalFound: 0 },
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
markdown: [
|
||||
`Found ${results.length} evidence chunk(s):`,
|
||||
'',
|
||||
...results.map((result, index) => {
|
||||
const reasonLine =
|
||||
result.matchReasons && result.matchReasons.length > 0
|
||||
? ` matchReasons: ${result.matchReasons.join(', ')}\n`
|
||||
: '';
|
||||
return (
|
||||
`${index + 1}. **${result.title}** (${result.path})\n` +
|
||||
` chunkId: ${result.chunkId}\n` +
|
||||
` stableCitationKey: ${result.stableCitationKey}\n` +
|
||||
reasonLine +
|
||||
` snippet: ${result.snippet}`
|
||||
);
|
||||
}),
|
||||
].join('\n'),
|
||||
structured: {
|
||||
success: true,
|
||||
totalFound: results.length,
|
||||
results: results.map((result) => ({
|
||||
...result,
|
||||
...(result.matchReasons ? { matchReasons: result.matchReasons } : {}),
|
||||
...(result.lanes ? { lanes: result.lanes } : {}),
|
||||
lastEditedAt: result.lastEditedAt?.toISOString() ?? null,
|
||||
})),
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
145
packages/context/src/tools/context-evidence-tool-store.ts
Normal file
145
packages/context/src/tools/context-evidence-tool-store.ts
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
import type { InsertContextCandidateInput } from '../ingest/context-candidates/index.js';
|
||||
import type { JsonValue } from '../ingest/ports.js';
|
||||
|
||||
export interface ContextEvidenceSearchArgs {
|
||||
connectionId: string;
|
||||
sourceKey?: string;
|
||||
queryEmbedding: number[] | null;
|
||||
queryText: string;
|
||||
limit: number;
|
||||
includeDeleted: boolean;
|
||||
currentRunId?: string;
|
||||
}
|
||||
|
||||
export type ContextEvidenceSearchMatchReason = 'lexical' | 'semantic' | 'token' | (string & {});
|
||||
|
||||
export interface ContextEvidenceSearchLaneSummary {
|
||||
lane: string;
|
||||
status: 'available' | 'skipped' | 'failed';
|
||||
requestedCandidatePoolLimit: number;
|
||||
effectiveCandidatePoolLimit: number;
|
||||
returnedCandidateCount: number;
|
||||
weight: number;
|
||||
reason?: string;
|
||||
}
|
||||
|
||||
export interface ContextEvidenceSearchResult {
|
||||
chunkId: string;
|
||||
documentId: string;
|
||||
externalId: string;
|
||||
title: string;
|
||||
path: string;
|
||||
url: string | null;
|
||||
snippet: string;
|
||||
score: number;
|
||||
citation: JsonValue;
|
||||
stableCitationKey: string;
|
||||
syncId: string;
|
||||
lastEditedAt: Date | null;
|
||||
matchReasons?: ContextEvidenceSearchMatchReason[];
|
||||
lanes?: ContextEvidenceSearchLaneSummary[];
|
||||
}
|
||||
|
||||
export interface ContextEvidenceDocumentForRead {
|
||||
id: string;
|
||||
title: string;
|
||||
path: string;
|
||||
external_id: string;
|
||||
url: string | null;
|
||||
}
|
||||
|
||||
export interface ContextEvidenceChunkForRead {
|
||||
id: string;
|
||||
content: string;
|
||||
citation?: JsonValue;
|
||||
}
|
||||
|
||||
export interface ContextEvidenceReadResult {
|
||||
document: ContextEvidenceDocumentForRead;
|
||||
chunks: ContextEvidenceChunkForRead[];
|
||||
}
|
||||
|
||||
export interface ContextEvidenceChunkReadResult {
|
||||
document: ContextEvidenceDocumentForRead;
|
||||
chunk: ContextEvidenceChunkForRead;
|
||||
}
|
||||
|
||||
export interface ContextEvidenceNeighborResult {
|
||||
documentId: string;
|
||||
externalId: string;
|
||||
title: string;
|
||||
path: string;
|
||||
relation: 'parent' | 'children' | 'linked' | 'backlinked' | 'same_path';
|
||||
url: string | null;
|
||||
lastEditedAt: Date | null;
|
||||
}
|
||||
|
||||
export interface ContextEvidenceChunkForCandidate {
|
||||
chunkId: string;
|
||||
documentId: string;
|
||||
externalId: string;
|
||||
title: string;
|
||||
path: string;
|
||||
url: string | null;
|
||||
rawPath: string;
|
||||
content: string;
|
||||
citation: JsonValue;
|
||||
stableCitationKey: string;
|
||||
syncId: string;
|
||||
lastEditedAt: Date | null;
|
||||
}
|
||||
|
||||
export interface ContextCandidateInsertResult {
|
||||
id: string;
|
||||
candidate_key: string;
|
||||
promotion_score: number;
|
||||
status: string;
|
||||
}
|
||||
|
||||
export interface ContextCandidateStatusResult {
|
||||
candidate_key: string;
|
||||
status: string;
|
||||
}
|
||||
|
||||
export interface ContextEvidenceToolStorePort {
|
||||
searchRRF(args: ContextEvidenceSearchArgs): Promise<ContextEvidenceSearchResult[]>;
|
||||
readChunkById(
|
||||
chunkId: string,
|
||||
connectionId: string,
|
||||
sourceKey: string,
|
||||
currentRunId?: string,
|
||||
): Promise<ContextEvidenceChunkReadResult | null>;
|
||||
readDocumentById(
|
||||
documentId: string,
|
||||
connectionId: string,
|
||||
sourceKey: string,
|
||||
currentRunId?: string,
|
||||
): Promise<ContextEvidenceReadResult | null>;
|
||||
readDocumentByExternalId(
|
||||
connectionId: string,
|
||||
sourceKey: string,
|
||||
externalId: string,
|
||||
currentRunId?: string,
|
||||
): Promise<ContextEvidenceReadResult | null>;
|
||||
findNeighborDocuments(args: {
|
||||
connectionId: string;
|
||||
sourceKey: string;
|
||||
documentId: string;
|
||||
relation: 'parent' | 'children' | 'linked' | 'backlinked' | 'same_path';
|
||||
limit: number;
|
||||
currentRunId?: string;
|
||||
}): Promise<ContextEvidenceNeighborResult[]>;
|
||||
readChunksByIds(
|
||||
chunkIds: string[],
|
||||
connectionId: string,
|
||||
sourceKey: string,
|
||||
currentRunId?: string,
|
||||
): Promise<ContextEvidenceChunkForCandidate[]>;
|
||||
insertCandidate(input: InsertContextCandidateInput): Promise<ContextCandidateInsertResult>;
|
||||
updateCandidateStatus(args: {
|
||||
runId: string;
|
||||
candidateKey: string;
|
||||
status: 'pending' | 'promoted' | 'merged' | 'rejected' | 'conflict';
|
||||
rejectionReason: string | null;
|
||||
}): Promise<ContextCandidateStatusResult | null>;
|
||||
}
|
||||
598
packages/context/src/tools/context-evidence-tools.test.ts
Normal file
598
packages/context/src/tools/context-evidence-tools.test.ts
Normal file
|
|
@ -0,0 +1,598 @@
|
|||
import { createHash } from 'node:crypto';
|
||||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import type { KloEmbeddingPort } from '../core/index.js';
|
||||
import { SqliteContextEvidenceStore } from '../ingest/context-evidence/sqlite-context-evidence-store.js';
|
||||
import { ContextCandidateMarkTool } from './context-candidate-mark.tool.js';
|
||||
import { ContextCandidateWriteTool } from './context-candidate-write.tool.js';
|
||||
import { ContextEvidenceNeighborsTool } from './context-evidence-neighbors.tool.js';
|
||||
import { ContextEvidenceReadTool } from './context-evidence-read.tool.js';
|
||||
import { ContextEvidenceSearchTool } from './context-evidence-search.tool.js';
|
||||
import type { ContextEvidenceToolStorePort } from './context-evidence-tool-store.js';
|
||||
import { createTouchedSlSources, type ToolContext, type ToolSession } from './index.js';
|
||||
|
||||
const ingestContext = (): ToolContext => ({
|
||||
sourceId: 'ingest',
|
||||
messageId: 'job-1-wu-unit-1',
|
||||
userId: 'system',
|
||||
connectionId: '00000000-0000-0000-0000-000000000001',
|
||||
ingest: {
|
||||
runId: '10000000-0000-0000-0000-000000000001',
|
||||
jobId: 'job-1',
|
||||
syncId: 'sync-1',
|
||||
sourceKey: 'notion',
|
||||
},
|
||||
session: {
|
||||
connectionId: '00000000-0000-0000-0000-000000000001',
|
||||
isWorktreeScoped: true,
|
||||
preHead: 'abc123',
|
||||
touchedSlSources: createTouchedSlSources(),
|
||||
actions: [],
|
||||
ingest: {
|
||||
runId: '10000000-0000-0000-0000-000000000001',
|
||||
jobId: 'job-1',
|
||||
syncId: 'sync-1',
|
||||
sourceKey: 'notion',
|
||||
},
|
||||
} as unknown as ToolSession,
|
||||
});
|
||||
|
||||
const makeEmbeddingService = (overrides: Partial<KloEmbeddingPort> = {}) =>
|
||||
({
|
||||
computeEmbedding: vi.fn().mockResolvedValue([0.25, 0.5, 0.75]),
|
||||
...overrides,
|
||||
}) as Partial<KloEmbeddingPort> as KloEmbeddingPort;
|
||||
|
||||
describe('context evidence tools', () => {
|
||||
it('searches context evidence with ingest defaults', async () => {
|
||||
const repository = {
|
||||
searchRRF: vi.fn().mockResolvedValue([
|
||||
{
|
||||
chunkId: 'chunk-1',
|
||||
documentId: 'doc-1',
|
||||
externalId: 'page-1',
|
||||
title: 'Revenue Recognition',
|
||||
path: 'Company Handbook / Finance / Revenue Recognition',
|
||||
url: 'https://notion.example/page-1',
|
||||
snippet: 'Booked revenue excludes refunds and test accounts.',
|
||||
score: 0.35,
|
||||
citation: { source: 'notion', pageId: 'page-1', rawPath: 'pages/page-1/page.md' },
|
||||
stableCitationKey: 'notion:page-1:policy:abc',
|
||||
syncId: 'sync-1',
|
||||
lastEditedAt: new Date('2026-04-12T10:15:00.000Z'),
|
||||
matchReasons: ['lexical', 'semantic'],
|
||||
lanes: [
|
||||
{
|
||||
lane: 'lexical',
|
||||
status: 'available',
|
||||
requestedCandidatePoolLimit: 25,
|
||||
effectiveCandidatePoolLimit: 25,
|
||||
returnedCandidateCount: 1,
|
||||
weight: 1.5,
|
||||
},
|
||||
{
|
||||
lane: 'semantic',
|
||||
status: 'available',
|
||||
requestedCandidatePoolLimit: 25,
|
||||
effectiveCandidatePoolLimit: 25,
|
||||
returnedCandidateCount: 1,
|
||||
weight: 2,
|
||||
},
|
||||
],
|
||||
},
|
||||
]),
|
||||
} as Partial<ContextEvidenceToolStorePort> as ContextEvidenceToolStorePort;
|
||||
const embeddings = {
|
||||
computeEmbedding: vi.fn().mockResolvedValue([0.1, ...Array.from({ length: 383 }, () => 0)]),
|
||||
} as Partial<KloEmbeddingPort> as KloEmbeddingPort;
|
||||
|
||||
const tool = new ContextEvidenceSearchTool(repository, embeddings);
|
||||
const result = await tool.call({ query: 'revenue refunds', limit: 5, includeDeleted: false }, ingestContext());
|
||||
|
||||
expect(repository.searchRRF).toHaveBeenCalledWith({
|
||||
connectionId: '00000000-0000-0000-0000-000000000001',
|
||||
sourceKey: 'notion',
|
||||
queryEmbedding: [0.1, ...Array.from({ length: 383 }, () => 0)],
|
||||
queryText: 'revenue refunds',
|
||||
limit: 5,
|
||||
includeDeleted: false,
|
||||
currentRunId: '10000000-0000-0000-0000-000000000001',
|
||||
});
|
||||
expect(result.markdown).toContain('Revenue Recognition');
|
||||
expect(result.markdown).toContain('matchReasons: lexical, semantic');
|
||||
expect(result.structured.success).toBe(true);
|
||||
if (result.structured.success) {
|
||||
expect(result.structured.results[0]).toMatchObject({
|
||||
chunkId: 'chunk-1',
|
||||
stableCitationKey: 'notion:page-1:policy:abc',
|
||||
matchReasons: ['lexical', 'semantic'],
|
||||
lanes: expect.arrayContaining([expect.objectContaining({ lane: 'lexical', status: 'available' })]),
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
it('returns a structured ingest metadata error outside ingest sessions', async () => {
|
||||
const tool = new ContextEvidenceSearchTool(
|
||||
{ searchRRF: vi.fn() } as Partial<ContextEvidenceToolStorePort> as ContextEvidenceToolStorePort,
|
||||
{ computeEmbedding: vi.fn() } as Partial<KloEmbeddingPort> as KloEmbeddingPort,
|
||||
);
|
||||
|
||||
const result = await tool.call(
|
||||
{ query: 'revenue', limit: 5, includeDeleted: false },
|
||||
{ sourceId: 'research', messageId: 'm1', userId: 'user-1' },
|
||||
);
|
||||
|
||||
expect(result.structured).toMatchObject({ success: false, error: 'INGEST_METADATA_REQUIRED' });
|
||||
});
|
||||
|
||||
it('reads a full document by external id', async () => {
|
||||
const repository = {
|
||||
readDocumentByExternalId: vi.fn().mockResolvedValue({
|
||||
document: {
|
||||
id: 'doc-1',
|
||||
title: 'Onboarding SOP',
|
||||
path: 'Ops / Onboarding SOP',
|
||||
external_id: 'page-ops',
|
||||
raw_path: 'pages/page-ops/page.md',
|
||||
url: 'https://notion.example/page-ops',
|
||||
},
|
||||
chunks: [
|
||||
{
|
||||
id: 'chunk-1',
|
||||
heading_path: ['Onboarding SOP', 'Checklist'],
|
||||
content: 'Create account, invite to workspace, confirm dashboard access.',
|
||||
citation: { source: 'notion', pageId: 'page-ops' },
|
||||
},
|
||||
],
|
||||
}),
|
||||
} as Partial<ContextEvidenceToolStorePort> as ContextEvidenceToolStorePort;
|
||||
|
||||
const tool = new ContextEvidenceReadTool(repository);
|
||||
const result = await tool.call({ externalId: 'page-ops', includeNeighborChunks: false }, ingestContext());
|
||||
|
||||
expect(repository.readDocumentByExternalId).toHaveBeenCalledWith(
|
||||
'00000000-0000-0000-0000-000000000001',
|
||||
'notion',
|
||||
'page-ops',
|
||||
'10000000-0000-0000-0000-000000000001',
|
||||
);
|
||||
expect(result.markdown).toContain('## Onboarding SOP');
|
||||
expect(result.markdown).toContain('Create account');
|
||||
expect(result.structured.success).toBe(true);
|
||||
if (result.structured.success) {
|
||||
expect(result.structured.found).toBe(true);
|
||||
}
|
||||
});
|
||||
|
||||
it('reads documents and chunks by id with connection and source scope', async () => {
|
||||
const repository = {
|
||||
readDocumentById: vi.fn().mockResolvedValue({
|
||||
document: {
|
||||
id: '00000000-0000-0000-0000-000000000201',
|
||||
title: 'Scoped Document',
|
||||
path: 'Scoped Document',
|
||||
external_id: 'page-scoped',
|
||||
url: null,
|
||||
},
|
||||
chunks: [{ id: 'chunk-1', content: 'Scoped content.' }],
|
||||
}),
|
||||
readChunkById: vi.fn().mockResolvedValue({
|
||||
document: {
|
||||
id: '00000000-0000-0000-0000-000000000201',
|
||||
title: 'Scoped Document',
|
||||
path: 'Scoped Document',
|
||||
external_id: 'page-scoped',
|
||||
url: null,
|
||||
},
|
||||
chunk: {
|
||||
id: '00000000-0000-0000-0000-000000000301',
|
||||
content: 'Scoped chunk.',
|
||||
citation: { source: 'notion' },
|
||||
},
|
||||
}),
|
||||
} as Partial<ContextEvidenceToolStorePort> as ContextEvidenceToolStorePort;
|
||||
|
||||
const tool = new ContextEvidenceReadTool(repository);
|
||||
await tool.call(
|
||||
{ documentId: '00000000-0000-0000-0000-000000000201', includeNeighborChunks: false },
|
||||
ingestContext(),
|
||||
);
|
||||
await tool.call({ chunkId: '00000000-0000-0000-0000-000000000301', includeNeighborChunks: false }, ingestContext());
|
||||
|
||||
expect(repository.readDocumentById).toHaveBeenCalledWith(
|
||||
'00000000-0000-0000-0000-000000000201',
|
||||
'00000000-0000-0000-0000-000000000001',
|
||||
'notion',
|
||||
'10000000-0000-0000-0000-000000000001',
|
||||
);
|
||||
expect(repository.readChunkById).toHaveBeenCalledWith(
|
||||
'00000000-0000-0000-0000-000000000301',
|
||||
'00000000-0000-0000-0000-000000000001',
|
||||
'notion',
|
||||
'10000000-0000-0000-0000-000000000001',
|
||||
);
|
||||
});
|
||||
|
||||
it('lists evidence neighbors', async () => {
|
||||
const repository = {
|
||||
findNeighborDocuments: vi.fn().mockResolvedValue([
|
||||
{
|
||||
documentId: 'doc-child',
|
||||
externalId: 'page-child',
|
||||
title: 'Revenue Caveats',
|
||||
path: 'Company Handbook / Finance / Revenue Caveats',
|
||||
relation: 'children',
|
||||
url: null,
|
||||
lastEditedAt: null,
|
||||
},
|
||||
]),
|
||||
} as Partial<ContextEvidenceToolStorePort> as ContextEvidenceToolStorePort;
|
||||
|
||||
const tool = new ContextEvidenceNeighborsTool(repository);
|
||||
const result = await tool.call({ documentId: 'doc-1', relation: 'children', limit: 10 }, ingestContext());
|
||||
|
||||
expect(repository.findNeighborDocuments).toHaveBeenCalledWith({
|
||||
connectionId: '00000000-0000-0000-0000-000000000001',
|
||||
sourceKey: 'notion',
|
||||
documentId: 'doc-1',
|
||||
relation: 'children',
|
||||
limit: 10,
|
||||
currentRunId: '10000000-0000-0000-0000-000000000001',
|
||||
});
|
||||
expect(result.markdown).toContain('Revenue Caveats');
|
||||
});
|
||||
|
||||
it('writes a cited candidate with durable evidence refs', async () => {
|
||||
const repository = {
|
||||
readChunksByIds: vi.fn().mockResolvedValue([
|
||||
{
|
||||
chunkId: '00000000-0000-0000-0000-000000000101',
|
||||
documentId: 'doc-1',
|
||||
externalId: 'page-1',
|
||||
title: 'Revenue Recognition',
|
||||
path: 'Company Handbook / Finance / Revenue Recognition',
|
||||
url: 'https://notion.example/page-1',
|
||||
rawPath: 'pages/page-1/page.md',
|
||||
content: 'Booked revenue excludes refunds and test accounts.',
|
||||
citation: { source: 'notion', pageId: 'page-1', rawPath: 'pages/page-1/page.md' },
|
||||
stableCitationKey: 'notion:page-1:policy:abc',
|
||||
syncId: 'sync-1',
|
||||
lastEditedAt: new Date('2026-04-12T10:15:00.000Z'),
|
||||
},
|
||||
]),
|
||||
insertCandidate: vi.fn().mockResolvedValue({
|
||||
id: 'candidate-1',
|
||||
candidate_key: 'revenue-definition',
|
||||
promotion_score: 10,
|
||||
status: 'pending',
|
||||
}),
|
||||
} as Partial<ContextEvidenceToolStorePort> as ContextEvidenceToolStorePort;
|
||||
|
||||
const embeddings = makeEmbeddingService();
|
||||
const tool = new ContextCandidateWriteTool(repository, embeddings);
|
||||
const result = await tool.call(
|
||||
{
|
||||
candidateKey: 'revenue-definition',
|
||||
topic: 'Revenue Recognition',
|
||||
assertion: 'Booked revenue excludes refunds and test accounts.',
|
||||
rationale: 'Finance handbook is the source of truth and describes the reusable revenue rule.',
|
||||
evidenceChunkIds: ['00000000-0000-0000-0000-000000000101'],
|
||||
suggestedPageKey: 'revenue-definition',
|
||||
actionHint: 'create',
|
||||
durabilityScore: 3,
|
||||
authorityScore: 3,
|
||||
reuseScore: 3,
|
||||
noveltyScore: 2,
|
||||
riskScore: 1,
|
||||
},
|
||||
ingestContext(),
|
||||
);
|
||||
|
||||
expect(repository.readChunksByIds).toHaveBeenCalledWith(
|
||||
['00000000-0000-0000-0000-000000000101'],
|
||||
'00000000-0000-0000-0000-000000000001',
|
||||
'notion',
|
||||
'10000000-0000-0000-0000-000000000001',
|
||||
);
|
||||
|
||||
expect(repository.insertCandidate).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
runId: '10000000-0000-0000-0000-000000000001',
|
||||
connectionId: '00000000-0000-0000-0000-000000000001',
|
||||
sourceKey: 'notion',
|
||||
candidateKey: 'revenue-definition',
|
||||
promotionScore: 10,
|
||||
status: 'pending',
|
||||
evidenceRefs: [
|
||||
expect.objectContaining({
|
||||
chunkId: '00000000-0000-0000-0000-000000000101',
|
||||
stableCitationKey: 'notion:page-1:policy:abc',
|
||||
snippetHash: createHash('sha256')
|
||||
.update('Booked revenue excludes refunds and test accounts.')
|
||||
.digest('hex'),
|
||||
}),
|
||||
],
|
||||
}),
|
||||
);
|
||||
expect(embeddings.computeEmbedding).toHaveBeenCalledWith(
|
||||
'Revenue Recognition - Booked revenue excludes refunds and test accounts.',
|
||||
);
|
||||
expect(repository.insertCandidate).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
embedding: [0.25, 0.5, 0.75],
|
||||
}),
|
||||
);
|
||||
expect(result.structured).toMatchObject({ success: true, candidateKey: 'revenue-definition', promotionScore: 10 });
|
||||
});
|
||||
|
||||
it('saves candidate writes with a null embedding when embedding generation fails', async () => {
|
||||
const repository = {
|
||||
readChunksByIds: vi.fn().mockResolvedValue([
|
||||
{
|
||||
chunkId: '00000000-0000-0000-0000-000000000101',
|
||||
documentId: 'doc-1',
|
||||
externalId: 'page-1',
|
||||
title: 'Revenue Recognition',
|
||||
path: 'Company Handbook / Finance / Revenue Recognition',
|
||||
url: 'https://notion.example/page-1',
|
||||
rawPath: 'pages/page-1/page.md',
|
||||
content: 'Booked revenue excludes refunds and test accounts.',
|
||||
citation: { source: 'notion', pageId: 'page-1', rawPath: 'pages/page-1/page.md' },
|
||||
stableCitationKey: 'notion:page-1:policy:abc',
|
||||
syncId: 'sync-1',
|
||||
lastEditedAt: new Date('2026-04-12T10:15:00.000Z'),
|
||||
},
|
||||
]),
|
||||
insertCandidate: vi.fn().mockResolvedValue({
|
||||
id: 'candidate-1',
|
||||
candidate_key: 'revenue-definition',
|
||||
promotion_score: 10,
|
||||
status: 'pending',
|
||||
}),
|
||||
} as Partial<ContextEvidenceToolStorePort> as ContextEvidenceToolStorePort;
|
||||
const embeddings = makeEmbeddingService({
|
||||
computeEmbedding: vi.fn().mockRejectedValue(new Error('embedding provider unavailable')),
|
||||
});
|
||||
|
||||
const tool = new ContextCandidateWriteTool(repository, embeddings);
|
||||
const result = await tool.call(
|
||||
{
|
||||
candidateKey: 'revenue-definition',
|
||||
topic: 'Revenue Recognition',
|
||||
assertion: 'Booked revenue excludes refunds and test accounts.',
|
||||
rationale: 'Finance handbook is the source of truth and describes the reusable revenue rule.',
|
||||
evidenceChunkIds: ['00000000-0000-0000-0000-000000000101'],
|
||||
suggestedPageKey: 'revenue-definition',
|
||||
actionHint: 'create',
|
||||
durabilityScore: 3,
|
||||
authorityScore: 3,
|
||||
reuseScore: 3,
|
||||
noveltyScore: 2,
|
||||
riskScore: 1,
|
||||
},
|
||||
ingestContext(),
|
||||
);
|
||||
|
||||
expect(embeddings.computeEmbedding).toHaveBeenCalledWith(
|
||||
'Revenue Recognition - Booked revenue excludes refunds and test accounts.',
|
||||
);
|
||||
expect(repository.insertCandidate).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
embedding: null,
|
||||
}),
|
||||
);
|
||||
expect(result.structured).toMatchObject({ success: true, candidateKey: 'revenue-definition', promotionScore: 10 });
|
||||
});
|
||||
|
||||
it('rejects candidate writes without evidence chunks', async () => {
|
||||
const embeddings = makeEmbeddingService();
|
||||
const tool = new ContextCandidateWriteTool(
|
||||
{
|
||||
insertCandidate: vi.fn(),
|
||||
} as Partial<ContextEvidenceToolStorePort> as ContextEvidenceToolStorePort,
|
||||
embeddings,
|
||||
);
|
||||
|
||||
const result = await tool.call(
|
||||
{
|
||||
candidateKey: 'uncited',
|
||||
topic: 'Uncited',
|
||||
assertion: 'This has no evidence.',
|
||||
rationale: 'No evidence was provided.',
|
||||
evidenceChunkIds: [],
|
||||
actionHint: 'create',
|
||||
durabilityScore: 1,
|
||||
authorityScore: 1,
|
||||
reuseScore: 1,
|
||||
noveltyScore: 1,
|
||||
riskScore: 1,
|
||||
},
|
||||
ingestContext(),
|
||||
);
|
||||
|
||||
expect(result.structured).toMatchObject({ success: false, error: 'EVIDENCE_REQUIRED' });
|
||||
expect(embeddings.computeEmbedding).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('marks a candidate status during reconciliation', async () => {
|
||||
const repository = {
|
||||
updateCandidateStatus: vi.fn().mockResolvedValue({
|
||||
id: 'candidate-1',
|
||||
candidate_key: 'revenue-definition',
|
||||
status: 'promoted',
|
||||
}),
|
||||
} as Partial<ContextEvidenceToolStorePort> as ContextEvidenceToolStorePort;
|
||||
|
||||
const tool = new ContextCandidateMarkTool(repository);
|
||||
const result = await tool.call(
|
||||
{ candidateKey: 'revenue-definition', status: 'promoted', rejectionReason: null },
|
||||
ingestContext(),
|
||||
);
|
||||
|
||||
expect(repository.updateCandidateStatus).toHaveBeenCalledWith({
|
||||
runId: '10000000-0000-0000-0000-000000000001',
|
||||
candidateKey: 'revenue-definition',
|
||||
status: 'promoted',
|
||||
rejectionReason: null,
|
||||
});
|
||||
expect(result.structured).toMatchObject({ success: true, candidateKey: 'revenue-definition', status: 'promoted' });
|
||||
});
|
||||
});
|
||||
|
||||
describe('context evidence tools against real SqliteContextEvidenceStore', () => {
|
||||
let tempDir: string;
|
||||
let dbPath: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'klo-context-tools-sqlite-'));
|
||||
dbPath = join(tempDir, '.klo', 'db.sqlite');
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
const realStoreContext = (): ToolContext => ({
|
||||
sourceId: 'ingest',
|
||||
messageId: 'job-1-wu-unit-1',
|
||||
userId: 'system',
|
||||
connectionId: 'conn-1',
|
||||
ingest: {
|
||||
runId: 'run-1',
|
||||
jobId: 'job-1',
|
||||
syncId: 'sync-1',
|
||||
sourceKey: 'notion',
|
||||
},
|
||||
session: {
|
||||
connectionId: 'conn-1',
|
||||
isWorktreeScoped: true,
|
||||
preHead: 'abc123',
|
||||
touchedSlSources: createTouchedSlSources(),
|
||||
actions: [],
|
||||
ingest: {
|
||||
runId: 'run-1',
|
||||
jobId: 'job-1',
|
||||
syncId: 'sync-1',
|
||||
sourceKey: 'notion',
|
||||
},
|
||||
} as unknown as ToolSession,
|
||||
});
|
||||
|
||||
async function seedChunk(store: SqliteContextEvidenceStore): Promise<string> {
|
||||
const doc = await store.upsertDocument({
|
||||
runId: 'run-1',
|
||||
connectionId: 'conn-1',
|
||||
sourceKey: 'notion',
|
||||
externalId: 'page-1',
|
||||
externalParentId: null,
|
||||
databaseId: null,
|
||||
dataSourceId: null,
|
||||
title: 'Revenue Recognition',
|
||||
path: 'Company Handbook / Finance / Revenue Recognition',
|
||||
url: 'https://notion.test/page-1',
|
||||
objectType: 'page',
|
||||
lastEditedAt: new Date('2026-04-30T10:00:00.000Z'),
|
||||
lastEditedBy: 'user-1',
|
||||
rawPath: 'pages/page-1/page.md',
|
||||
syncId: 'sync-1',
|
||||
contentHash: 'hash-page-1',
|
||||
publishState: 'published',
|
||||
metadata: {},
|
||||
});
|
||||
await store.replaceChunks(doc.id, [
|
||||
{
|
||||
chunkKey: 'intro',
|
||||
headingPath: ['Revenue'],
|
||||
ordinal: 0,
|
||||
content: 'Booked revenue excludes refunds and test accounts.',
|
||||
searchText: 'booked revenue excludes refunds test accounts',
|
||||
embedding: [1, 0, 0],
|
||||
tokenCount: 8,
|
||||
citation: { source: 'notion', pageId: 'page-1', rawPath: 'pages/page-1/page.md' },
|
||||
stableCitationKey: 'notion:page-1:intro',
|
||||
syncId: 'sync-1',
|
||||
contentHash: 'chunk-page-1',
|
||||
},
|
||||
]);
|
||||
const read = await store.readDocumentByExternalId('conn-1', 'notion', 'page-1', 'run-1');
|
||||
if (!read) {
|
||||
throw new Error('seeded chunk not readable');
|
||||
}
|
||||
return read.chunks[0].id;
|
||||
}
|
||||
|
||||
it('candidate write accepts the prefixed chunkId returned by the real store and persists', async () => {
|
||||
const store = new SqliteContextEvidenceStore({ dbPath });
|
||||
const chunkId = await seedChunk(store);
|
||||
expect(chunkId).toMatch(/^ctxchunk-[0-9a-f-]{36}$/);
|
||||
|
||||
const tool = new ContextCandidateWriteTool(store, {
|
||||
computeEmbedding: vi.fn().mockResolvedValue([0.1, 0.2, 0.3]),
|
||||
} as Partial<KloEmbeddingPort> as KloEmbeddingPort);
|
||||
|
||||
const parsed = tool.parseInput({
|
||||
candidateKey: 'revenue-definition',
|
||||
topic: 'Revenue Recognition',
|
||||
assertion: 'Booked revenue excludes refunds and test accounts.',
|
||||
rationale: 'The Finance handbook is the source of truth.',
|
||||
evidenceChunkIds: [chunkId],
|
||||
actionHint: 'create',
|
||||
durabilityScore: 3,
|
||||
authorityScore: 3,
|
||||
reuseScore: 3,
|
||||
noveltyScore: 2,
|
||||
riskScore: 1,
|
||||
});
|
||||
|
||||
const result = await tool.call(parsed, realStoreContext());
|
||||
expect(result.structured).toMatchObject({
|
||||
success: true,
|
||||
candidateKey: 'revenue-definition',
|
||||
promotionScore: 10,
|
||||
status: 'pending',
|
||||
});
|
||||
});
|
||||
|
||||
it('candidate write schema rejects a bare UUID without the ctxchunk- prefix', () => {
|
||||
const tool = new ContextCandidateWriteTool(
|
||||
{} as ContextEvidenceToolStorePort,
|
||||
{ computeEmbedding: vi.fn() } as Partial<KloEmbeddingPort> as KloEmbeddingPort,
|
||||
);
|
||||
|
||||
expect(() =>
|
||||
tool.parseInput({
|
||||
candidateKey: 'revenue-definition',
|
||||
topic: 'Revenue Recognition',
|
||||
assertion: 'Booked revenue excludes refunds and test accounts.',
|
||||
rationale: 'Finance handbook is the source of truth.',
|
||||
evidenceChunkIds: ['00000000-0000-0000-0000-000000000101'],
|
||||
actionHint: 'create',
|
||||
durabilityScore: 3,
|
||||
authorityScore: 3,
|
||||
reuseScore: 3,
|
||||
noveltyScore: 2,
|
||||
riskScore: 1,
|
||||
}),
|
||||
).toThrow(/ctxchunk/);
|
||||
});
|
||||
|
||||
it('evidence read schema rejects bare UUIDs for chunkId and documentId', () => {
|
||||
const tool = new ContextEvidenceReadTool({} as ContextEvidenceToolStorePort);
|
||||
|
||||
expect(() =>
|
||||
tool.parseInput({ chunkId: '00000000-0000-0000-0000-000000000301', includeNeighborChunks: false }),
|
||||
).toThrow(/ctxchunk/);
|
||||
expect(() =>
|
||||
tool.parseInput({ documentId: '00000000-0000-0000-0000-000000000201', includeNeighborChunks: false }),
|
||||
).toThrow(/ctxdoc/);
|
||||
});
|
||||
|
||||
it('evidence neighbors schema rejects bare UUIDs for documentId', () => {
|
||||
const tool = new ContextEvidenceNeighborsTool({} as ContextEvidenceToolStorePort);
|
||||
expect(() =>
|
||||
tool.parseInput({ documentId: '00000000-0000-0000-0000-000000000201', relation: 'children', limit: 10 }),
|
||||
).toThrow(/ctxdoc/);
|
||||
});
|
||||
});
|
||||
23
packages/context/src/tools/context-ingest-metadata.ts
Normal file
23
packages/context/src/tools/context-ingest-metadata.ts
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
import type { ToolContext, ToolOutput } from './base-tool.js';
|
||||
import type { IngestToolMetadata } from './tool-session.js';
|
||||
|
||||
export interface ToolFailure {
|
||||
success: false;
|
||||
error: string;
|
||||
message: string;
|
||||
}
|
||||
|
||||
export function resolveIngestMetadata(context: ToolContext): IngestToolMetadata | null {
|
||||
return context.session?.ingest ?? context.ingest ?? null;
|
||||
}
|
||||
|
||||
export function ingestMetadataRequired<T extends ToolFailure = ToolFailure>(): ToolOutput<T> {
|
||||
return {
|
||||
markdown: 'Error: this tool is only available inside an ingest WorkUnit or ingest reconciliation session.',
|
||||
structured: {
|
||||
success: false,
|
||||
error: 'INGEST_METADATA_REQUIRED',
|
||||
message: 'This tool requires ingest metadata on ToolContext or ToolSession.',
|
||||
} as T,
|
||||
};
|
||||
}
|
||||
43
packages/context/src/tools/index.ts
Normal file
43
packages/context/src/tools/index.ts
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
export type { GitAuthor, GitAuthorResolverPort } from './authors.js';
|
||||
export { SYSTEM_GIT_AUTHOR } from './authors.js';
|
||||
export type {
|
||||
MethodologyEntry,
|
||||
ToolContext,
|
||||
ToolOutput,
|
||||
ToolProgressRelayPort,
|
||||
ToolTimingTrackerPort,
|
||||
} from './base-tool.js';
|
||||
export { BaseTool } from './base-tool.js';
|
||||
export { ContextCandidateMarkTool } from './context-candidate-mark.tool.js';
|
||||
export { ContextCandidateWriteTool } from './context-candidate-write.tool.js';
|
||||
export { ContextEvidenceNeighborsTool } from './context-evidence-neighbors.tool.js';
|
||||
export { ContextEvidenceReadTool } from './context-evidence-read.tool.js';
|
||||
export { ContextEvidenceSearchTool } from './context-evidence-search.tool.js';
|
||||
export type {
|
||||
ContextCandidateInsertResult,
|
||||
ContextCandidateStatusResult,
|
||||
ContextEvidenceChunkForCandidate,
|
||||
ContextEvidenceChunkForRead,
|
||||
ContextEvidenceChunkReadResult,
|
||||
ContextEvidenceDocumentForRead,
|
||||
ContextEvidenceNeighborResult,
|
||||
ContextEvidenceReadResult,
|
||||
ContextEvidenceSearchArgs,
|
||||
ContextEvidenceSearchResult,
|
||||
ContextEvidenceToolStorePort,
|
||||
} from './context-evidence-tool-store.js';
|
||||
export type { ToolFailure } from './context-ingest-metadata.js';
|
||||
export { ingestMetadataRequired, resolveIngestMetadata } from './context-ingest-metadata.js';
|
||||
export type { SqlEdit } from './sql-edit-replacer.js';
|
||||
export { applySqlEdits } from './sql-edit-replacer.js';
|
||||
export type { IngestToolMetadata, MemoryAction, ToolSession } from './tool-session.js';
|
||||
export type { TouchedSlSource, TouchedSlSourceSet } from './touched-sl-sources.js';
|
||||
export {
|
||||
addTouchedSlSource,
|
||||
createTouchedSlSources,
|
||||
deleteTouchedSlSource,
|
||||
hasTouchedSlSource,
|
||||
listTouchedSlSources,
|
||||
touchedSlSourceCount,
|
||||
touchedSlSourceNamesForConnection,
|
||||
} from './touched-sl-sources.js';
|
||||
229
packages/context/src/tools/sql-edit-replacer.ts
Normal file
229
packages/context/src/tools/sql-edit-replacer.ts
Normal file
|
|
@ -0,0 +1,229 @@
|
|||
export interface SqlEdit {
|
||||
oldText: string;
|
||||
newText: string;
|
||||
reason?: string;
|
||||
}
|
||||
|
||||
interface SqlEditResult {
|
||||
success: boolean;
|
||||
sql: string;
|
||||
appliedEdits: number;
|
||||
errors: string[];
|
||||
}
|
||||
|
||||
type ReplacerResult = { sql: string; note?: string } | { error: string } | null;
|
||||
|
||||
function exactReplacer(sql: string, oldText: string, newText: string): ReplacerResult {
|
||||
if (oldText.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let count = 0;
|
||||
let idx = -1;
|
||||
let searchFrom = 0;
|
||||
|
||||
while (true) {
|
||||
const found = sql.indexOf(oldText, searchFrom);
|
||||
if (found === -1) {
|
||||
break;
|
||||
}
|
||||
count++;
|
||||
idx = found;
|
||||
searchFrom = found + 1;
|
||||
}
|
||||
|
||||
if (count === 0) {
|
||||
return null;
|
||||
}
|
||||
if (count > 1) {
|
||||
return { error: `Found ${count} matches for text, expected 1. Add more surrounding context.` };
|
||||
}
|
||||
|
||||
return { sql: sql.slice(0, idx) + newText + sql.slice(idx + oldText.length) };
|
||||
}
|
||||
|
||||
function buildCharacterMap(original: string): number[] {
|
||||
const map: number[] = [];
|
||||
for (let i = 0; i < original.length; i++) {
|
||||
if (/\s/.test(original[i])) {
|
||||
if (map.length === 0 || !/\s/.test(original[i - 1])) {
|
||||
map.push(i);
|
||||
}
|
||||
} else {
|
||||
map.push(i);
|
||||
}
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
function whitespaceNormalizedReplacer(sql: string, oldText: string, newText: string): ReplacerResult {
|
||||
const normalizedSql = sql.replace(/\s+/g, ' ');
|
||||
const normalizedOldText = oldText.replace(/\s+/g, ' ');
|
||||
|
||||
if (normalizedOldText.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let count = 0;
|
||||
let matchIdx = -1;
|
||||
let searchFrom = 0;
|
||||
|
||||
while (true) {
|
||||
const found = normalizedSql.indexOf(normalizedOldText, searchFrom);
|
||||
if (found === -1) {
|
||||
break;
|
||||
}
|
||||
count++;
|
||||
matchIdx = found;
|
||||
searchFrom = found + 1;
|
||||
}
|
||||
|
||||
if (count === 0) {
|
||||
return null;
|
||||
}
|
||||
if (count > 1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const charMap = buildCharacterMap(sql);
|
||||
|
||||
const originalStart = charMap[matchIdx];
|
||||
const normalizedEnd = matchIdx + normalizedOldText.length;
|
||||
|
||||
let originalEnd: number;
|
||||
if (normalizedEnd >= charMap.length) {
|
||||
originalEnd = sql.length;
|
||||
} else {
|
||||
originalEnd = charMap[normalizedEnd];
|
||||
}
|
||||
|
||||
return { sql: sql.slice(0, originalStart) + newText + sql.slice(originalEnd) };
|
||||
}
|
||||
|
||||
function levenshteinDistance(a: string, b: string): number {
|
||||
const m = a.length;
|
||||
const n = b.length;
|
||||
|
||||
if (m === 0) {
|
||||
return n;
|
||||
}
|
||||
if (n === 0) {
|
||||
return m;
|
||||
}
|
||||
|
||||
const dp: number[][] = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0));
|
||||
|
||||
for (let i = 0; i <= m; i++) {
|
||||
dp[i][0] = i;
|
||||
}
|
||||
for (let j = 0; j <= n; j++) {
|
||||
dp[0][j] = j;
|
||||
}
|
||||
|
||||
for (let i = 1; i <= m; i++) {
|
||||
for (let j = 1; j <= n; j++) {
|
||||
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
|
||||
dp[i][j] = Math.min(dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + cost);
|
||||
}
|
||||
}
|
||||
|
||||
return dp[m][n];
|
||||
}
|
||||
|
||||
function fuzzyReplacer(sql: string, oldText: string, newText: string): ReplacerResult {
|
||||
if (oldText.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const targetLen = oldText.length;
|
||||
const minWindow = Math.max(1, Math.floor(targetLen * 0.85));
|
||||
const maxWindow = Math.ceil(targetLen * 1.15);
|
||||
|
||||
let bestDistance = Infinity;
|
||||
let bestStart = -1;
|
||||
let bestEnd = -1;
|
||||
|
||||
for (let windowLen = minWindow; windowLen <= maxWindow; windowLen++) {
|
||||
if (windowLen > sql.length) {
|
||||
break;
|
||||
}
|
||||
|
||||
for (let start = 0; start <= sql.length - windowLen; start++) {
|
||||
const candidate = sql.slice(start, start + windowLen);
|
||||
const distance = levenshteinDistance(candidate, oldText);
|
||||
if (distance < bestDistance) {
|
||||
bestDistance = distance;
|
||||
bestStart = start;
|
||||
bestEnd = start + windowLen;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (bestStart === -1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const maxLen = Math.max(oldText.length, bestEnd - bestStart);
|
||||
const similarity = 1 - bestDistance / maxLen;
|
||||
|
||||
if (similarity < 0.85) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const matchedText = sql.slice(bestStart, bestEnd);
|
||||
return {
|
||||
sql: sql.slice(0, bestStart) + newText + sql.slice(bestEnd),
|
||||
note: `Fuzzy match used (similarity: ${(similarity * 100).toFixed(1)}%, matched: "${matchedText}")`,
|
||||
};
|
||||
}
|
||||
|
||||
interface ApplySqlEditsOptions {
|
||||
exactOnly?: boolean;
|
||||
}
|
||||
|
||||
export function applySqlEdits(sql: string, edits: SqlEdit[], options?: ApplySqlEditsOptions): SqlEditResult {
|
||||
let currentSql = sql;
|
||||
let appliedEdits = 0;
|
||||
const errors: string[] = [];
|
||||
|
||||
for (const edit of edits) {
|
||||
const replacers = options?.exactOnly
|
||||
? [exactReplacer]
|
||||
: [exactReplacer, whitespaceNormalizedReplacer, fuzzyReplacer];
|
||||
let applied = false;
|
||||
|
||||
for (const replacer of replacers) {
|
||||
const result = replacer(currentSql, edit.oldText, edit.newText);
|
||||
|
||||
if (result === null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ('error' in result) {
|
||||
const context = edit.reason ? ` (reason: ${edit.reason})` : '';
|
||||
errors.push(`${result.error}${context}`);
|
||||
applied = true;
|
||||
break;
|
||||
}
|
||||
|
||||
currentSql = result.sql;
|
||||
appliedEdits++;
|
||||
applied = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!applied) {
|
||||
const context = edit.reason ? ` (reason: ${edit.reason})` : '';
|
||||
errors.push(
|
||||
`No match found for edit${context}: "${edit.oldText.slice(0, 80)}${edit.oldText.length > 80 ? '...' : ''}"`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
success: errors.length === 0,
|
||||
sql: currentSql,
|
||||
appliedEdits,
|
||||
errors,
|
||||
};
|
||||
}
|
||||
54
packages/context/src/tools/tool-session.ts
Normal file
54
packages/context/src/tools/tool-session.ts
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
import type { GitService, KloFileStorePort } from '../core/index.js';
|
||||
import type { SemanticLayerService } from '../sl/index.js';
|
||||
import type { KnowledgeWikiService } from '../wiki/index.js';
|
||||
import type { TouchedSlSourceSet } from './touched-sl-sources.js';
|
||||
|
||||
export interface IngestToolMetadata {
|
||||
runId: string;
|
||||
jobId: string;
|
||||
syncId: string;
|
||||
sourceKey: string;
|
||||
}
|
||||
|
||||
export interface MemoryAction {
|
||||
target: 'wiki' | 'sl';
|
||||
type: 'created' | 'updated' | 'removed';
|
||||
key: string;
|
||||
detail: string;
|
||||
targetConnectionId?: string | null;
|
||||
}
|
||||
|
||||
interface EvictionDecisionRecord {
|
||||
rawPath: string;
|
||||
artifactKind: 'wiki' | 'sl';
|
||||
artifactKey: string;
|
||||
action: 'removed' | 'retained_deprecated' | 'retained_supported';
|
||||
reason: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Per-WU (or per-memory-agent) state threaded through ToolContext. When present,
|
||||
* SL/wiki tools read session-scoped services and emit touched-set entries / actions
|
||||
* instead of hitting shared services. When absent, tools behave as they do for
|
||||
* interactive research/workshop callers.
|
||||
*/
|
||||
export interface ToolSession {
|
||||
/**
|
||||
* Warehouse connection targeted by SL tools. `null` when the session has no
|
||||
* warehouse connection (wiki-only memory-agent turns) — SL tools must guard
|
||||
* for this and return a structured error rather than execute against a
|
||||
* blank connection.
|
||||
*/
|
||||
connectionId: string | null;
|
||||
/** When true, worktree-scoped service writes bypass DB index updates. */
|
||||
isWorktreeScoped: boolean;
|
||||
preHead: string | null;
|
||||
touchedSlSources: TouchedSlSourceSet;
|
||||
actions: MemoryAction[];
|
||||
semanticLayerService: SemanticLayerService;
|
||||
wikiService: KnowledgeWikiService;
|
||||
configService: KloFileStorePort;
|
||||
gitService: GitService;
|
||||
ingest?: IngestToolMetadata;
|
||||
evictionDecisions?: EvictionDecisionRecord[];
|
||||
}
|
||||
45
packages/context/src/tools/touched-sl-sources.test.ts
Normal file
45
packages/context/src/tools/touched-sl-sources.test.ts
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
addTouchedSlSource,
|
||||
createTouchedSlSources,
|
||||
deleteTouchedSlSource,
|
||||
hasTouchedSlSource,
|
||||
listTouchedSlSources,
|
||||
touchedSlSourceCount,
|
||||
touchedSlSourceNamesForConnection,
|
||||
} from './touched-sl-sources.js';
|
||||
|
||||
describe('target-aware touched SL source helpers', () => {
|
||||
it('deduplicates by connectionId and sourceName while preserving target identity', () => {
|
||||
const touched = createTouchedSlSources();
|
||||
|
||||
addTouchedSlSource(touched, 'warehouse-a', 'orders');
|
||||
addTouchedSlSource(touched, 'warehouse-a', 'orders');
|
||||
addTouchedSlSource(touched, 'warehouse-b', 'orders');
|
||||
|
||||
expect(listTouchedSlSources(touched)).toEqual([
|
||||
{ connectionId: 'warehouse-a', sourceName: 'orders' },
|
||||
{ connectionId: 'warehouse-b', sourceName: 'orders' },
|
||||
]);
|
||||
expect(touchedSlSourceCount(touched)).toBe(2);
|
||||
expect(hasTouchedSlSource(touched, 'warehouse-a', 'orders')).toBe(true);
|
||||
expect(hasTouchedSlSource(touched, 'warehouse-b', 'orders')).toBe(true);
|
||||
});
|
||||
|
||||
it('lists touched names for one connection and deletes only that connection/source pair', () => {
|
||||
const touched = createTouchedSlSources([
|
||||
{ connectionId: 'warehouse-a', sourceName: 'orders' },
|
||||
{ connectionId: 'warehouse-a', sourceName: 'customers' },
|
||||
{ connectionId: 'warehouse-b', sourceName: 'orders' },
|
||||
]);
|
||||
|
||||
deleteTouchedSlSource(touched, 'warehouse-a', 'orders');
|
||||
|
||||
expect(touchedSlSourceNamesForConnection(touched, 'warehouse-a')).toEqual(['customers']);
|
||||
expect(touchedSlSourceNamesForConnection(touched, 'warehouse-b')).toEqual(['orders']);
|
||||
expect(listTouchedSlSources(touched)).toEqual([
|
||||
{ connectionId: 'warehouse-a', sourceName: 'customers' },
|
||||
{ connectionId: 'warehouse-b', sourceName: 'orders' },
|
||||
]);
|
||||
});
|
||||
});
|
||||
60
packages/context/src/tools/touched-sl-sources.ts
Normal file
60
packages/context/src/tools/touched-sl-sources.ts
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
export interface TouchedSlSource {
|
||||
connectionId: string;
|
||||
sourceName: string;
|
||||
}
|
||||
|
||||
export type TouchedSlSourceSet = Map<string, Set<string>>;
|
||||
|
||||
export function createTouchedSlSources(entries: TouchedSlSource[] = []): TouchedSlSourceSet {
|
||||
const touched: TouchedSlSourceSet = new Map();
|
||||
for (const entry of entries) {
|
||||
addTouchedSlSource(touched, entry.connectionId, entry.sourceName);
|
||||
}
|
||||
return touched;
|
||||
}
|
||||
|
||||
export function addTouchedSlSource(touched: TouchedSlSourceSet, connectionId: string, sourceName: string): void {
|
||||
const bucket = touched.get(connectionId) ?? new Set<string>();
|
||||
bucket.add(sourceName);
|
||||
touched.set(connectionId, bucket);
|
||||
}
|
||||
|
||||
export function deleteTouchedSlSource(touched: TouchedSlSourceSet, connectionId: string, sourceName: string): void {
|
||||
const bucket = touched.get(connectionId);
|
||||
if (!bucket) {
|
||||
return;
|
||||
}
|
||||
bucket.delete(sourceName);
|
||||
if (bucket.size === 0) {
|
||||
touched.delete(connectionId);
|
||||
}
|
||||
}
|
||||
|
||||
export function hasTouchedSlSource(touched: TouchedSlSourceSet, connectionId: string, sourceName: string): boolean {
|
||||
return touched.get(connectionId)?.has(sourceName) ?? false;
|
||||
}
|
||||
|
||||
export function listTouchedSlSources(touched: TouchedSlSourceSet): TouchedSlSource[] {
|
||||
const out: TouchedSlSource[] = [];
|
||||
for (const [connectionId, sources] of touched) {
|
||||
for (const sourceName of sources) {
|
||||
out.push({ connectionId, sourceName });
|
||||
}
|
||||
}
|
||||
return out.sort((left, right) => {
|
||||
const byConnection = left.connectionId.localeCompare(right.connectionId);
|
||||
return byConnection === 0 ? left.sourceName.localeCompare(right.sourceName) : byConnection;
|
||||
});
|
||||
}
|
||||
|
||||
export function touchedSlSourceCount(touched: TouchedSlSourceSet): number {
|
||||
let total = 0;
|
||||
for (const sources of touched.values()) {
|
||||
total += sources.size;
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
export function touchedSlSourceNamesForConnection(touched: TouchedSlSourceSet, connectionId: string): string[] {
|
||||
return [...(touched.get(connectionId) ?? [])].sort();
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue