feat: add Docker entrypoints, LLM providers, pipeline hardening, workbench pages

Phase 9 — four parallel workstreams:

- Stream A: 14 Docker entrypoints for containerized deployment
- Stream B: Pipeline hardening — robust JSON parsing, LLM retry logic,
  consumer negative-ack, FalkorDB test import fix
- Stream C: Azure OpenAI, OpenAI-compatible, and Mistral LLM providers
- Stream D: Workbench Prompts, Token Cost, Knowledge Cores pages +
  Settings feature switches

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
elpresidank 2026-04-07 03:22:55 -05:00
parent 50fb311d2d
commit c7eefee607
34 changed files with 1457 additions and 112 deletions

View file

@ -93,64 +93,71 @@ export class KnowledgeExtractService extends FlowProcessor {
// --- Extract relationships ---
try {
const relPrompt = await promptClient.request({
name: "extract-relationships",
variables: { text },
});
const relPrompt = await promptClient.request(
{ name: "extract-relationships", variables: { text } },
{ timeoutMs: 10_000 },
);
if (!relPrompt.error) {
const relCompletion = await llmClient.request({
system: relPrompt.system,
prompt: relPrompt.prompt,
});
let relationships: ExtractedRelationship[] | null = null;
for (let attempt = 0; attempt < 3; attempt++) {
const relCompletion = await llmClient.request(
{ system: relPrompt.system, prompt: relPrompt.prompt },
{ timeoutMs: 120_000 },
);
if (!relCompletion.error && relCompletion.response) {
const relationships = parseJsonResponse<ExtractedRelationship[]>(relCompletion.response);
if (relationships) {
for (const rel of relationships) {
if (!rel.subject || !rel.predicate || !rel.object) continue;
const subjectIri = toEntityIri(rel.subject);
const predicateIri = toEntityIri(rel.predicate);
const objectIri = toEntityIri(rel.object);
// Main relationship triple
allTriples.push({ s: subjectIri, p: predicateIri, o: objectIri });
// rdfs:label triples for each entity
allTriples.push({
s: subjectIri,
p: iriTerm(RDFS_LABEL),
o: literalTerm(rel.subject),
});
allTriples.push({
s: predicateIri,
p: iriTerm(RDFS_LABEL),
o: literalTerm(rel.predicate),
});
allTriples.push({
s: objectIri,
p: iriTerm(RDFS_LABEL),
o: literalTerm(rel.object),
});
// Entity contexts for subject and object
allEntityContexts.push({
entity: subjectIri,
context: text,
chunkId: msg.documentId,
});
allEntityContexts.push({
entity: objectIri,
context: text,
chunkId: msg.documentId,
});
}
console.log(`[KnowledgeExtract] Extracted ${relationships.length} relationships`);
if (!relCompletion.error && relCompletion.response) {
relationships = parseJsonResponse<ExtractedRelationship[]>(relCompletion.response);
if (relationships) break;
console.warn(`[KnowledgeExtract] Relationship parse failed, attempt ${attempt + 1}/3`);
} else {
break; // LLM error, don't retry
}
}
if (relationships) {
for (const rel of relationships) {
if (!rel.subject || !rel.predicate || !rel.object) continue;
const subjectIri = toEntityIri(rel.subject);
const predicateIri = toEntityIri(rel.predicate);
const objectIri = toEntityIri(rel.object);
// Main relationship triple
allTriples.push({ s: subjectIri, p: predicateIri, o: objectIri });
// rdfs:label triples for each entity
allTriples.push({
s: subjectIri,
p: iriTerm(RDFS_LABEL),
o: literalTerm(rel.subject),
});
allTriples.push({
s: predicateIri,
p: iriTerm(RDFS_LABEL),
o: literalTerm(rel.predicate),
});
allTriples.push({
s: objectIri,
p: iriTerm(RDFS_LABEL),
o: literalTerm(rel.object),
});
// Entity contexts for subject and object
allEntityContexts.push({
entity: subjectIri,
context: text,
chunkId: msg.documentId,
});
allEntityContexts.push({
entity: objectIri,
context: text,
chunkId: msg.documentId,
});
}
console.log(`[KnowledgeExtract] Extracted ${relationships.length} relationships`);
}
}
} catch (err) {
console.error("[KnowledgeExtract] Relationship extraction failed:", err);
@ -158,51 +165,58 @@ export class KnowledgeExtractService extends FlowProcessor {
// --- Extract definitions ---
try {
const defPrompt = await promptClient.request({
name: "extract-definitions",
variables: { text },
});
const defPrompt = await promptClient.request(
{ name: "extract-definitions", variables: { text } },
{ timeoutMs: 10_000 },
);
if (!defPrompt.error) {
const defCompletion = await llmClient.request({
system: defPrompt.system,
prompt: defPrompt.prompt,
});
let definitions: ExtractedDefinition[] | null = null;
for (let attempt = 0; attempt < 3; attempt++) {
const defCompletion = await llmClient.request(
{ system: defPrompt.system, prompt: defPrompt.prompt },
{ timeoutMs: 120_000 },
);
if (!defCompletion.error && defCompletion.response) {
const definitions = parseJsonResponse<ExtractedDefinition[]>(defCompletion.response);
if (definitions) {
for (const def of definitions) {
if (!def.entity || !def.definition) continue;
const entityIri = toEntityIri(def.entity);
// Definition triple
allTriples.push({
s: entityIri,
p: iriTerm(SKOS_DEFINITION),
o: literalTerm(def.definition),
});
// Label triple
allTriples.push({
s: entityIri,
p: iriTerm(RDFS_LABEL),
o: literalTerm(def.entity),
});
// Entity context
allEntityContexts.push({
entity: entityIri,
context: text,
chunkId: msg.documentId,
});
}
console.log(`[KnowledgeExtract] Extracted ${definitions.length} definitions`);
if (!defCompletion.error && defCompletion.response) {
definitions = parseJsonResponse<ExtractedDefinition[]>(defCompletion.response);
if (definitions) break;
console.warn(`[KnowledgeExtract] Definition parse failed, attempt ${attempt + 1}/3`);
} else {
break; // LLM error, don't retry
}
}
if (definitions) {
for (const def of definitions) {
if (!def.entity || !def.definition) continue;
const entityIri = toEntityIri(def.entity);
// Definition triple
allTriples.push({
s: entityIri,
p: iriTerm(SKOS_DEFINITION),
o: literalTerm(def.definition),
});
// Label triple
allTriples.push({
s: entityIri,
p: iriTerm(RDFS_LABEL),
o: literalTerm(def.entity),
});
// Entity context
allEntityContexts.push({
entity: entityIri,
context: text,
chunkId: msg.documentId,
});
}
console.log(`[KnowledgeExtract] Extracted ${definitions.length} definitions`);
}
}
} catch (err) {
console.error("[KnowledgeExtract] Definition extraction failed:", err);
@ -245,23 +259,49 @@ function literalTerm(value: string): Term {
/**
* Parse JSON from LLM output, handling markdown code fences and malformed output.
* Uses progressive fallback: direct parse, array extraction, truncated array repair, single object wrap.
*/
function parseJsonResponse<T>(raw: string): T | null {
try {
// Strip markdown code fences
let cleaned = raw.trim();
// Remove ```json ... ``` or ``` ... ```
const fenceMatch = cleaned.match(/^```(?:json)?\s*\n?([\s\S]*?)\n?```$/);
if (fenceMatch) {
cleaned = fenceMatch[1].trim();
}
return JSON.parse(cleaned) as T;
} catch {
console.warn("[KnowledgeExtract] Failed to parse JSON from LLM response:", raw.slice(0, 200));
return null;
// Attempt 1: direct parse after stripping fences
let cleaned = raw.trim();
const fenceMatch = cleaned.match(/^```(?:json)?\s*\n?([\s\S]*?)\n?```$/);
if (fenceMatch) {
cleaned = fenceMatch[1].trim();
}
try {
return JSON.parse(cleaned) as T;
} catch { /* fall through */ }
// Attempt 2: extract first JSON array from the text
const arrayMatch = cleaned.match(/\[[\s\S]*\]/);
if (arrayMatch) {
try {
return JSON.parse(arrayMatch[0]) as T;
} catch { /* fall through */ }
// Attempt 3: try to fix truncated array by closing it after the last complete object
const partial = arrayMatch[0];
const lastBrace = partial.lastIndexOf('}');
if (lastBrace > 0) {
const truncated = partial.slice(0, lastBrace + 1) + ']';
try {
return JSON.parse(truncated) as T;
} catch { /* fall through */ }
}
}
// Attempt 4: extract first JSON object, wrap in array
const objMatch = cleaned.match(/\{[\s\S]*?\}/);
if (objMatch) {
try {
const obj = JSON.parse(objMatch[0]);
return [obj] as unknown as T;
} catch { /* fall through */ }
}
console.warn("[KnowledgeExtract] Failed to parse JSON from LLM response:", raw.slice(0, 300));
return null;
}
export async function run(): Promise<void> {

View file

@ -79,3 +79,12 @@ export { DocumentRagService } from "./retrieval/document-rag-service.js";
// Flow manager service
export { FlowManagerService } from "./flow-manager/service.js";
// Azure OpenAI text completion
export { AzureOpenAIProcessor } from "./model/text-completion/azure-openai.js";
// OpenAI-compatible text completion
export { OpenAICompatibleProcessor } from "./model/text-completion/openai-compatible.js";
// Mistral text completion
export { MistralProcessor } from "./model/text-completion/mistral.js";

View file

@ -0,0 +1,156 @@
/**
* Azure OpenAI text completion service.
*
* Env:
* AZURE_TOKEN (required Azure OpenAI API key)
* AZURE_ENDPOINT (required e.g. https://my-resource.openai.azure.com)
* AZURE_MODEL (default: gpt-4o)
* AZURE_API_VERSION (default: 2024-12-01-preview)
*/
import { AzureOpenAI } from "openai";
import {
LlmService,
type ProcessorConfig,
type LlmResult,
type LlmChunk,
TooManyRequestsError,
} from "@trustgraph/base";
export class AzureOpenAIProcessor extends LlmService {
private client: AzureOpenAI;
private readonly defaultModel: string;
private readonly defaultTemperature: number;
private readonly maxOutput: number;
constructor(
config: ProcessorConfig & {
model?: string;
apiKey?: string;
endpoint?: string;
apiVersion?: string;
temperature?: number;
maxOutput?: number;
},
) {
super(config);
this.defaultModel = config.model ?? process.env.AZURE_MODEL ?? "gpt-4o";
this.defaultTemperature = config.temperature ?? 0.0;
this.maxOutput = config.maxOutput ?? 4096;
const apiKey = config.apiKey ?? process.env.AZURE_TOKEN;
if (!apiKey) throw new Error("Azure OpenAI API key not specified");
const endpoint = config.endpoint ?? process.env.AZURE_ENDPOINT;
if (!endpoint) throw new Error("Azure OpenAI endpoint not specified");
const apiVersion =
config.apiVersion ??
process.env.AZURE_API_VERSION ??
"2024-12-01-preview";
this.client = new AzureOpenAI({ apiKey, apiVersion, endpoint });
console.log("[AzureOpenAI] LLM service initialized");
}
async generateContent(
system: string,
prompt: string,
model?: string,
temperature?: number,
): Promise<LlmResult> {
const modelName = model ?? this.defaultModel;
const temp = temperature ?? this.defaultTemperature;
try {
const resp = await this.client.chat.completions.create({
model: modelName,
messages: [
{ role: "system", content: system },
{ role: "user", content: prompt },
],
temperature: temp,
max_completion_tokens: this.maxOutput,
});
return {
text: resp.choices[0].message.content ?? "",
inToken: resp.usage?.prompt_tokens ?? 0,
outToken: resp.usage?.completion_tokens ?? 0,
model: modelName,
};
} catch (err) {
if ((err as any)?.status === 429) {
throw new TooManyRequestsError();
}
throw err;
}
}
override supportsStreaming(): boolean {
return true;
}
async *generateContentStream(
system: string,
prompt: string,
model?: string,
temperature?: number,
): AsyncGenerator<LlmChunk> {
const modelName = model ?? this.defaultModel;
const temp = temperature ?? this.defaultTemperature;
try {
const stream = await this.client.chat.completions.create({
model: modelName,
messages: [
{ role: "system", content: system },
{ role: "user", content: prompt },
],
temperature: temp,
max_completion_tokens: this.maxOutput,
stream: true,
stream_options: { include_usage: true },
});
let totalInputTokens = 0;
let totalOutputTokens = 0;
for await (const chunk of stream) {
if (chunk.choices?.[0]?.delta?.content) {
yield {
text: chunk.choices[0].delta.content,
inToken: null,
outToken: null,
model: modelName,
isFinal: false,
};
}
if (chunk.usage) {
totalInputTokens = chunk.usage.prompt_tokens;
totalOutputTokens = chunk.usage.completion_tokens;
}
}
yield {
text: "",
inToken: totalInputTokens,
outToken: totalOutputTokens,
model: modelName,
isFinal: true,
};
} catch (err) {
if ((err as any)?.status === 429) {
throw new TooManyRequestsError();
}
throw err;
}
}
}
export async function run(): Promise<void> {
await AzureOpenAIProcessor.launch("text-completion");
}

View file

@ -0,0 +1,144 @@
/**
* Mistral text completion service.
*
* Env:
* MISTRAL_TOKEN (required Mistral API key)
* MISTRAL_MODEL (default: ministral-8b-latest)
*/
import { Mistral } from "@mistralai/mistralai";
import {
LlmService,
type ProcessorConfig,
type LlmResult,
type LlmChunk,
TooManyRequestsError,
} from "@trustgraph/base";
export class MistralProcessor extends LlmService {
private client: Mistral;
private readonly defaultModel: string;
private readonly defaultTemperature: number;
private readonly maxOutput: number;
constructor(
config: ProcessorConfig & {
model?: string;
apiKey?: string;
temperature?: number;
maxOutput?: number;
},
) {
super(config);
this.defaultModel =
config.model ?? process.env.MISTRAL_MODEL ?? "ministral-8b-latest";
this.defaultTemperature = config.temperature ?? 0.0;
this.maxOutput = config.maxOutput ?? 4096;
const apiKey = config.apiKey ?? process.env.MISTRAL_TOKEN;
if (!apiKey) throw new Error("Mistral API key not specified");
this.client = new Mistral({ apiKey });
console.log("[Mistral] LLM service initialized");
}
async generateContent(
system: string,
prompt: string,
model?: string,
temperature?: number,
): Promise<LlmResult> {
const modelName = model ?? this.defaultModel;
const temp = temperature ?? this.defaultTemperature;
try {
const resp = await this.client.chat.complete({
model: modelName,
messages: [
{ role: "system", content: system },
{ role: "user", content: prompt },
],
temperature: temp,
maxTokens: this.maxOutput,
});
return {
text: (resp.choices?.[0]?.message?.content as string) ?? "",
inToken: resp.usage?.promptTokens ?? 0,
outToken: resp.usage?.completionTokens ?? 0,
model: modelName,
};
} catch (err) {
if ((err as any)?.statusCode === 429 || (err as any)?.status === 429) {
throw new TooManyRequestsError();
}
throw err;
}
}
override supportsStreaming(): boolean {
return true;
}
async *generateContentStream(
system: string,
prompt: string,
model?: string,
temperature?: number,
): AsyncGenerator<LlmChunk> {
const modelName = model ?? this.defaultModel;
const temp = temperature ?? this.defaultTemperature;
try {
const stream = await this.client.chat.stream({
model: modelName,
messages: [
{ role: "system", content: system },
{ role: "user", content: prompt },
],
temperature: temp,
maxTokens: this.maxOutput,
});
let totalInputTokens = 0;
let totalOutputTokens = 0;
for await (const chunk of stream) {
const delta = chunk.data?.choices?.[0]?.delta;
if (delta?.content) {
yield {
text: delta.content as string,
inToken: null,
outToken: null,
model: modelName,
isFinal: false,
};
}
if (chunk.data?.usage) {
totalInputTokens = chunk.data.usage.promptTokens ?? 0;
totalOutputTokens = chunk.data.usage.completionTokens ?? 0;
}
}
yield {
text: "",
inToken: totalInputTokens,
outToken: totalOutputTokens,
model: modelName,
isFinal: true,
};
} catch (err) {
if ((err as any)?.statusCode === 429 || (err as any)?.status === 429) {
throw new TooManyRequestsError();
}
throw err;
}
}
}
export async function run(): Promise<void> {
await MistralProcessor.launch("text-completion");
}

View file

@ -0,0 +1,139 @@
/**
* OpenAI-compatible text completion service (generic local server).
*
* Works with LM Studio, llama.cpp, vLLM, Ollama OpenAI-compat endpoint, etc.
*
* Env:
* OPENAI_COMPAT_URL (required e.g. http://localhost:1234/v1)
* OPENAI_COMPAT_KEY (default: sk-no-key-required)
* OPENAI_COMPAT_MODEL (default: default)
*/
import OpenAI from "openai";
import {
LlmService,
type ProcessorConfig,
type LlmResult,
type LlmChunk,
} from "@trustgraph/base";
export class OpenAICompatibleProcessor extends LlmService {
private client: OpenAI;
private readonly defaultModel: string;
private readonly defaultTemperature: number;
private readonly maxOutput: number;
constructor(
config: ProcessorConfig & {
model?: string;
apiKey?: string;
baseUrl?: string;
temperature?: number;
maxOutput?: number;
},
) {
super(config);
this.defaultModel =
config.model ?? process.env.OPENAI_COMPAT_MODEL ?? "default";
this.defaultTemperature = config.temperature ?? 0.0;
this.maxOutput = config.maxOutput ?? 4096;
const baseURL = config.baseUrl ?? process.env.OPENAI_COMPAT_URL;
if (!baseURL)
throw new Error(
"OpenAI-compatible server URL not specified (set OPENAI_COMPAT_URL)",
);
const apiKey =
config.apiKey ?? process.env.OPENAI_COMPAT_KEY ?? "sk-no-key-required";
this.client = new OpenAI({ baseURL, apiKey });
console.log("[OpenAI-Compatible] LLM service initialized");
}
async generateContent(
system: string,
prompt: string,
model?: string,
temperature?: number,
): Promise<LlmResult> {
const modelName = model ?? this.defaultModel;
const temp = temperature ?? this.defaultTemperature;
const resp = await this.client.chat.completions.create({
model: modelName,
messages: [
{ role: "system", content: system },
{ role: "user", content: prompt },
],
temperature: temp,
max_tokens: this.maxOutput,
});
return {
text: resp.choices[0].message.content ?? "",
inToken: resp.usage?.prompt_tokens ?? 0,
outToken: resp.usage?.completion_tokens ?? 0,
model: modelName,
};
}
override supportsStreaming(): boolean {
return true;
}
async *generateContentStream(
system: string,
prompt: string,
model?: string,
temperature?: number,
): AsyncGenerator<LlmChunk> {
const modelName = model ?? this.defaultModel;
const temp = temperature ?? this.defaultTemperature;
const stream = await this.client.chat.completions.create({
model: modelName,
messages: [
{ role: "system", content: system },
{ role: "user", content: prompt },
],
temperature: temp,
max_tokens: this.maxOutput,
stream: true,
});
let totalInputTokens = 0;
let totalOutputTokens = 0;
for await (const chunk of stream) {
if (chunk.choices?.[0]?.delta?.content) {
yield {
text: chunk.choices[0].delta.content,
inToken: null,
outToken: null,
model: modelName,
isFinal: false,
};
}
if (chunk.usage) {
totalInputTokens = chunk.usage.prompt_tokens;
totalOutputTokens = chunk.usage.completion_tokens;
}
}
yield {
text: "",
inToken: totalInputTokens,
outToken: totalOutputTokens,
model: modelName,
isFinal: true,
};
}
}
export async function run(): Promise<void> {
await OpenAICompatibleProcessor.launch("text-completion");
}