feat: add Docker entrypoints, LLM providers, pipeline hardening, workbench pages

Phase 9 — four parallel workstreams: - Stream A: 14 Docker entrypoints for containerized deployment - Stream B: Pipeline hardening — robust JSON parsing, LLM retry logic, consumer negative-ack, FalkorDB test import fix - Stream C: Azure OpenAI, OpenAI-compatible, and Mistral LLM providers - Stream D: Workbench Prompts, Token Cost, Knowledge Cores pages + Settings feature switches Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-07-01 09:29:38 +02:00 · 2026-04-07 03:22:55 -05:00 · 2026-04-07 03:22:55 -05:00 · c7eefee607
commit c7eefee607
parent 50fb311d2d
34 changed files with 1457 additions and 112 deletions
--- a/ts/packages/flow/src/extract/knowledge-extract.ts
+++ b/ts/packages/flow/src/extract/knowledge-extract.ts
@ -93,64 +93,71 @@ export class KnowledgeExtractService extends FlowProcessor {

    // --- Extract relationships ---
    try {
-      const relPrompt = await promptClient.request({
-        name: "extract-relationships",
-        variables: { text },
-      });
+      const relPrompt = await promptClient.request(
+        { name: "extract-relationships", variables: { text } },
+        { timeoutMs: 10_000 },
+      );

      if (!relPrompt.error) {
-        const relCompletion = await llmClient.request({
-          system: relPrompt.system,
-          prompt: relPrompt.prompt,
-        });
+        let relationships: ExtractedRelationship[] | null = null;
+        for (let attempt = 0; attempt < 3; attempt++) {
+          const relCompletion = await llmClient.request(
+            { system: relPrompt.system, prompt: relPrompt.prompt },
+            { timeoutMs: 120_000 },
+          );

-        if (!relCompletion.error && relCompletion.response) {
-          const relationships = parseJsonResponse<ExtractedRelationship[]>(relCompletion.response);
-
-          if (relationships) {
-            for (const rel of relationships) {
-              if (!rel.subject || !rel.predicate || !rel.object) continue;
-
-              const subjectIri = toEntityIri(rel.subject);
-              const predicateIri = toEntityIri(rel.predicate);
-              const objectIri = toEntityIri(rel.object);
-
-              // Main relationship triple
-              allTriples.push({ s: subjectIri, p: predicateIri, o: objectIri });
-
-              // rdfs:label triples for each entity
-              allTriples.push({
-                s: subjectIri,
-                p: iriTerm(RDFS_LABEL),
-                o: literalTerm(rel.subject),
-              });
-              allTriples.push({
-                s: predicateIri,
-                p: iriTerm(RDFS_LABEL),
-                o: literalTerm(rel.predicate),
-              });
-              allTriples.push({
-                s: objectIri,
-                p: iriTerm(RDFS_LABEL),
-                o: literalTerm(rel.object),
-              });
-
-              // Entity contexts for subject and object
-              allEntityContexts.push({
-                entity: subjectIri,
-                context: text,
-                chunkId: msg.documentId,
-              });
-              allEntityContexts.push({
-                entity: objectIri,
-                context: text,
-                chunkId: msg.documentId,
-              });
-            }
-
-            console.log(`[KnowledgeExtract] Extracted ${relationships.length} relationships`);
+          if (!relCompletion.error && relCompletion.response) {
+            relationships = parseJsonResponse<ExtractedRelationship[]>(relCompletion.response);
+            if (relationships) break;
+            console.warn(`[KnowledgeExtract] Relationship parse failed, attempt ${attempt + 1}/3`);
+          } else {
+            break; // LLM error, don't retry
          }
        }
+
+        if (relationships) {
+          for (const rel of relationships) {
+            if (!rel.subject || !rel.predicate || !rel.object) continue;
+
+            const subjectIri = toEntityIri(rel.subject);
+            const predicateIri = toEntityIri(rel.predicate);
+            const objectIri = toEntityIri(rel.object);
+
+            // Main relationship triple
+            allTriples.push({ s: subjectIri, p: predicateIri, o: objectIri });
+
+            // rdfs:label triples for each entity
+            allTriples.push({
+              s: subjectIri,
+              p: iriTerm(RDFS_LABEL),
+              o: literalTerm(rel.subject),
+            });
+            allTriples.push({
+              s: predicateIri,
+              p: iriTerm(RDFS_LABEL),
+              o: literalTerm(rel.predicate),
+            });
+            allTriples.push({
+              s: objectIri,
+              p: iriTerm(RDFS_LABEL),
+              o: literalTerm(rel.object),
+            });
+
+            // Entity contexts for subject and object
+            allEntityContexts.push({
+              entity: subjectIri,
+              context: text,
+              chunkId: msg.documentId,
+            });
+            allEntityContexts.push({
+              entity: objectIri,
+              context: text,
+              chunkId: msg.documentId,
+            });
+          }
+
+          console.log(`[KnowledgeExtract] Extracted ${relationships.length} relationships`);
+        }
      }
    } catch (err) {
      console.error("[KnowledgeExtract] Relationship extraction failed:", err);
@ -158,51 +165,58 @@ export class KnowledgeExtractService extends FlowProcessor {

    // --- Extract definitions ---
    try {
-      const defPrompt = await promptClient.request({
-        name: "extract-definitions",
-        variables: { text },
-      });
+      const defPrompt = await promptClient.request(
+        { name: "extract-definitions", variables: { text } },
+        { timeoutMs: 10_000 },
+      );

      if (!defPrompt.error) {
-        const defCompletion = await llmClient.request({
-          system: defPrompt.system,
-          prompt: defPrompt.prompt,
-        });
+        let definitions: ExtractedDefinition[] | null = null;
+        for (let attempt = 0; attempt < 3; attempt++) {
+          const defCompletion = await llmClient.request(
+            { system: defPrompt.system, prompt: defPrompt.prompt },
+            { timeoutMs: 120_000 },
+          );

-        if (!defCompletion.error && defCompletion.response) {
-          const definitions = parseJsonResponse<ExtractedDefinition[]>(defCompletion.response);
-
-          if (definitions) {
-            for (const def of definitions) {
-              if (!def.entity || !def.definition) continue;
-
-              const entityIri = toEntityIri(def.entity);
-
-              // Definition triple
-              allTriples.push({
-                s: entityIri,
-                p: iriTerm(SKOS_DEFINITION),
-                o: literalTerm(def.definition),
-              });
-
-              // Label triple
-              allTriples.push({
-                s: entityIri,
-                p: iriTerm(RDFS_LABEL),
-                o: literalTerm(def.entity),
-              });
-
-              // Entity context
-              allEntityContexts.push({
-                entity: entityIri,
-                context: text,
-                chunkId: msg.documentId,
-              });
-            }
-
-            console.log(`[KnowledgeExtract] Extracted ${definitions.length} definitions`);
+          if (!defCompletion.error && defCompletion.response) {
+            definitions = parseJsonResponse<ExtractedDefinition[]>(defCompletion.response);
+            if (definitions) break;
+            console.warn(`[KnowledgeExtract] Definition parse failed, attempt ${attempt + 1}/3`);
+          } else {
+            break; // LLM error, don't retry
          }
        }
+
+        if (definitions) {
+          for (const def of definitions) {
+            if (!def.entity || !def.definition) continue;
+
+            const entityIri = toEntityIri(def.entity);
+
+            // Definition triple
+            allTriples.push({
+              s: entityIri,
+              p: iriTerm(SKOS_DEFINITION),
+              o: literalTerm(def.definition),
+            });
+
+            // Label triple
+            allTriples.push({
+              s: entityIri,
+              p: iriTerm(RDFS_LABEL),
+              o: literalTerm(def.entity),
+            });
+
+            // Entity context
+            allEntityContexts.push({
+              entity: entityIri,
+              context: text,
+              chunkId: msg.documentId,
+            });
+          }
+
+          console.log(`[KnowledgeExtract] Extracted ${definitions.length} definitions`);
+        }
      }
    } catch (err) {
      console.error("[KnowledgeExtract] Definition extraction failed:", err);
@ -245,23 +259,49 @@ function literalTerm(value: string): Term {

 /**
 * Parse JSON from LLM output, handling markdown code fences and malformed output.
+ * Uses progressive fallback: direct parse, array extraction, truncated array repair, single object wrap.
 */
 function parseJsonResponse<T>(raw: string): T | null {
-  try {
-    // Strip markdown code fences
-    let cleaned = raw.trim();
-
-    // Remove ```json ... ``` or ``` ... ```
-    const fenceMatch = cleaned.match(/^```(?:json)?\s*\n?([\s\S]*?)\n?```$/);
-    if (fenceMatch) {
-      cleaned = fenceMatch[1].trim();
-    }
-
-    return JSON.parse(cleaned) as T;
-  } catch {
-    console.warn("[KnowledgeExtract] Failed to parse JSON from LLM response:", raw.slice(0, 200));
-    return null;
+  // Attempt 1: direct parse after stripping fences
+  let cleaned = raw.trim();
+  const fenceMatch = cleaned.match(/^```(?:json)?\s*\n?([\s\S]*?)\n?```$/);
+  if (fenceMatch) {
+    cleaned = fenceMatch[1].trim();
  }
+
+  try {
+    return JSON.parse(cleaned) as T;
+  } catch { /* fall through */ }
+
+  // Attempt 2: extract first JSON array from the text
+  const arrayMatch = cleaned.match(/\[[\s\S]*\]/);
+  if (arrayMatch) {
+    try {
+      return JSON.parse(arrayMatch[0]) as T;
+    } catch { /* fall through */ }
+
+    // Attempt 3: try to fix truncated array by closing it after the last complete object
+    const partial = arrayMatch[0];
+    const lastBrace = partial.lastIndexOf('}');
+    if (lastBrace > 0) {
+      const truncated = partial.slice(0, lastBrace + 1) + ']';
+      try {
+        return JSON.parse(truncated) as T;
+      } catch { /* fall through */ }
+    }
+  }
+
+  // Attempt 4: extract first JSON object, wrap in array
+  const objMatch = cleaned.match(/\{[\s\S]*?\}/);
+  if (objMatch) {
+    try {
+      const obj = JSON.parse(objMatch[0]);
+      return [obj] as unknown as T;
+    } catch { /* fall through */ }
+  }
+
+  console.warn("[KnowledgeExtract] Failed to parse JSON from LLM response:", raw.slice(0, 300));
+  return null;
 }

 export async function run(): Promise<void> {
--- a/ts/packages/flow/src/index.ts
+++ b/ts/packages/flow/src/index.ts
@ -79,3 +79,12 @@ export { DocumentRagService } from "./retrieval/document-rag-service.js";

 // Flow manager service
 export { FlowManagerService } from "./flow-manager/service.js";
+
+// Azure OpenAI text completion
+export { AzureOpenAIProcessor } from "./model/text-completion/azure-openai.js";
+
+// OpenAI-compatible text completion
+export { OpenAICompatibleProcessor } from "./model/text-completion/openai-compatible.js";
+
+// Mistral text completion
+export { MistralProcessor } from "./model/text-completion/mistral.js";
--- a/ts/packages/flow/src/model/text-completion/azure-openai.ts
+++ b/ts/packages/flow/src/model/text-completion/azure-openai.ts
@ -0,0 +1,156 @@
+/**
+ * Azure OpenAI text completion service.
+ *
+ * Env:
+ *   AZURE_TOKEN       (required – Azure OpenAI API key)
+ *   AZURE_ENDPOINT    (required – e.g. https://my-resource.openai.azure.com)
+ *   AZURE_MODEL       (default: gpt-4o)
+ *   AZURE_API_VERSION (default: 2024-12-01-preview)
+ */
+
+import { AzureOpenAI } from "openai";
+import {
+  LlmService,
+  type ProcessorConfig,
+  type LlmResult,
+  type LlmChunk,
+  TooManyRequestsError,
+} from "@trustgraph/base";
+
+export class AzureOpenAIProcessor extends LlmService {
+  private client: AzureOpenAI;
+  private readonly defaultModel: string;
+  private readonly defaultTemperature: number;
+  private readonly maxOutput: number;
+
+  constructor(
+    config: ProcessorConfig & {
+      model?: string;
+      apiKey?: string;
+      endpoint?: string;
+      apiVersion?: string;
+      temperature?: number;
+      maxOutput?: number;
+    },
+  ) {
+    super(config);
+
+    this.defaultModel = config.model ?? process.env.AZURE_MODEL ?? "gpt-4o";
+    this.defaultTemperature = config.temperature ?? 0.0;
+    this.maxOutput = config.maxOutput ?? 4096;
+
+    const apiKey = config.apiKey ?? process.env.AZURE_TOKEN;
+    if (!apiKey) throw new Error("Azure OpenAI API key not specified");
+
+    const endpoint = config.endpoint ?? process.env.AZURE_ENDPOINT;
+    if (!endpoint) throw new Error("Azure OpenAI endpoint not specified");
+
+    const apiVersion =
+      config.apiVersion ??
+      process.env.AZURE_API_VERSION ??
+      "2024-12-01-preview";
+
+    this.client = new AzureOpenAI({ apiKey, apiVersion, endpoint });
+
+    console.log("[AzureOpenAI] LLM service initialized");
+  }
+
+  async generateContent(
+    system: string,
+    prompt: string,
+    model?: string,
+    temperature?: number,
+  ): Promise<LlmResult> {
+    const modelName = model ?? this.defaultModel;
+    const temp = temperature ?? this.defaultTemperature;
+
+    try {
+      const resp = await this.client.chat.completions.create({
+        model: modelName,
+        messages: [
+          { role: "system", content: system },
+          { role: "user", content: prompt },
+        ],
+        temperature: temp,
+        max_completion_tokens: this.maxOutput,
+      });
+
+      return {
+        text: resp.choices[0].message.content ?? "",
+        inToken: resp.usage?.prompt_tokens ?? 0,
+        outToken: resp.usage?.completion_tokens ?? 0,
+        model: modelName,
+      };
+    } catch (err) {
+      if ((err as any)?.status === 429) {
+        throw new TooManyRequestsError();
+      }
+      throw err;
+    }
+  }
+
+  override supportsStreaming(): boolean {
+    return true;
+  }
+
+  async *generateContentStream(
+    system: string,
+    prompt: string,
+    model?: string,
+    temperature?: number,
+  ): AsyncGenerator<LlmChunk> {
+    const modelName = model ?? this.defaultModel;
+    const temp = temperature ?? this.defaultTemperature;
+
+    try {
+      const stream = await this.client.chat.completions.create({
+        model: modelName,
+        messages: [
+          { role: "system", content: system },
+          { role: "user", content: prompt },
+        ],
+        temperature: temp,
+        max_completion_tokens: this.maxOutput,
+        stream: true,
+        stream_options: { include_usage: true },
+      });
+
+      let totalInputTokens = 0;
+      let totalOutputTokens = 0;
+
+      for await (const chunk of stream) {
+        if (chunk.choices?.[0]?.delta?.content) {
+          yield {
+            text: chunk.choices[0].delta.content,
+            inToken: null,
+            outToken: null,
+            model: modelName,
+            isFinal: false,
+          };
+        }
+
+        if (chunk.usage) {
+          totalInputTokens = chunk.usage.prompt_tokens;
+          totalOutputTokens = chunk.usage.completion_tokens;
+        }
+      }
+
+      yield {
+        text: "",
+        inToken: totalInputTokens,
+        outToken: totalOutputTokens,
+        model: modelName,
+        isFinal: true,
+      };
+    } catch (err) {
+      if ((err as any)?.status === 429) {
+        throw new TooManyRequestsError();
+      }
+      throw err;
+    }
+  }
+}
+
+export async function run(): Promise<void> {
+  await AzureOpenAIProcessor.launch("text-completion");
+}
--- a/ts/packages/flow/src/model/text-completion/mistral.ts
+++ b/ts/packages/flow/src/model/text-completion/mistral.ts
@ -0,0 +1,144 @@
+/**
+ * Mistral text completion service.
+ *
+ * Env:
+ *   MISTRAL_TOKEN (required – Mistral API key)
+ *   MISTRAL_MODEL (default: ministral-8b-latest)
+ */
+
+import { Mistral } from "@mistralai/mistralai";
+import {
+  LlmService,
+  type ProcessorConfig,
+  type LlmResult,
+  type LlmChunk,
+  TooManyRequestsError,
+} from "@trustgraph/base";
+
+export class MistralProcessor extends LlmService {
+  private client: Mistral;
+  private readonly defaultModel: string;
+  private readonly defaultTemperature: number;
+  private readonly maxOutput: number;
+
+  constructor(
+    config: ProcessorConfig & {
+      model?: string;
+      apiKey?: string;
+      temperature?: number;
+      maxOutput?: number;
+    },
+  ) {
+    super(config);
+
+    this.defaultModel =
+      config.model ?? process.env.MISTRAL_MODEL ?? "ministral-8b-latest";
+    this.defaultTemperature = config.temperature ?? 0.0;
+    this.maxOutput = config.maxOutput ?? 4096;
+
+    const apiKey = config.apiKey ?? process.env.MISTRAL_TOKEN;
+    if (!apiKey) throw new Error("Mistral API key not specified");
+
+    this.client = new Mistral({ apiKey });
+
+    console.log("[Mistral] LLM service initialized");
+  }
+
+  async generateContent(
+    system: string,
+    prompt: string,
+    model?: string,
+    temperature?: number,
+  ): Promise<LlmResult> {
+    const modelName = model ?? this.defaultModel;
+    const temp = temperature ?? this.defaultTemperature;
+
+    try {
+      const resp = await this.client.chat.complete({
+        model: modelName,
+        messages: [
+          { role: "system", content: system },
+          { role: "user", content: prompt },
+        ],
+        temperature: temp,
+        maxTokens: this.maxOutput,
+      });
+
+      return {
+        text: (resp.choices?.[0]?.message?.content as string) ?? "",
+        inToken: resp.usage?.promptTokens ?? 0,
+        outToken: resp.usage?.completionTokens ?? 0,
+        model: modelName,
+      };
+    } catch (err) {
+      if ((err as any)?.statusCode === 429 || (err as any)?.status === 429) {
+        throw new TooManyRequestsError();
+      }
+      throw err;
+    }
+  }
+
+  override supportsStreaming(): boolean {
+    return true;
+  }
+
+  async *generateContentStream(
+    system: string,
+    prompt: string,
+    model?: string,
+    temperature?: number,
+  ): AsyncGenerator<LlmChunk> {
+    const modelName = model ?? this.defaultModel;
+    const temp = temperature ?? this.defaultTemperature;
+
+    try {
+      const stream = await this.client.chat.stream({
+        model: modelName,
+        messages: [
+          { role: "system", content: system },
+          { role: "user", content: prompt },
+        ],
+        temperature: temp,
+        maxTokens: this.maxOutput,
+      });
+
+      let totalInputTokens = 0;
+      let totalOutputTokens = 0;
+
+      for await (const chunk of stream) {
+        const delta = chunk.data?.choices?.[0]?.delta;
+        if (delta?.content) {
+          yield {
+            text: delta.content as string,
+            inToken: null,
+            outToken: null,
+            model: modelName,
+            isFinal: false,
+          };
+        }
+
+        if (chunk.data?.usage) {
+          totalInputTokens = chunk.data.usage.promptTokens ?? 0;
+          totalOutputTokens = chunk.data.usage.completionTokens ?? 0;
+        }
+      }
+
+      yield {
+        text: "",
+        inToken: totalInputTokens,
+        outToken: totalOutputTokens,
+        model: modelName,
+        isFinal: true,
+      };
+    } catch (err) {
+      if ((err as any)?.statusCode === 429 || (err as any)?.status === 429) {
+        throw new TooManyRequestsError();
+      }
+      throw err;
+    }
+  }
+}
+
+export async function run(): Promise<void> {
+  await MistralProcessor.launch("text-completion");
+}
--- a/ts/packages/flow/src/model/text-completion/openai-compatible.ts
+++ b/ts/packages/flow/src/model/text-completion/openai-compatible.ts
@ -0,0 +1,139 @@
+/**
+ * OpenAI-compatible text completion service (generic local server).
+ *
+ * Works with LM Studio, llama.cpp, vLLM, Ollama OpenAI-compat endpoint, etc.
+ *
+ * Env:
+ *   OPENAI_COMPAT_URL   (required – e.g. http://localhost:1234/v1)
+ *   OPENAI_COMPAT_KEY   (default: sk-no-key-required)
+ *   OPENAI_COMPAT_MODEL (default: default)
+ */
+
+import OpenAI from "openai";
+import {
+  LlmService,
+  type ProcessorConfig,
+  type LlmResult,
+  type LlmChunk,
+} from "@trustgraph/base";
+
+export class OpenAICompatibleProcessor extends LlmService {
+  private client: OpenAI;
+  private readonly defaultModel: string;
+  private readonly defaultTemperature: number;
+  private readonly maxOutput: number;
+
+  constructor(
+    config: ProcessorConfig & {
+      model?: string;
+      apiKey?: string;
+      baseUrl?: string;
+      temperature?: number;
+      maxOutput?: number;
+    },
+  ) {
+    super(config);
+
+    this.defaultModel =
+      config.model ?? process.env.OPENAI_COMPAT_MODEL ?? "default";
+    this.defaultTemperature = config.temperature ?? 0.0;
+    this.maxOutput = config.maxOutput ?? 4096;
+
+    const baseURL = config.baseUrl ?? process.env.OPENAI_COMPAT_URL;
+    if (!baseURL)
+      throw new Error(
+        "OpenAI-compatible server URL not specified (set OPENAI_COMPAT_URL)",
+      );
+
+    const apiKey =
+      config.apiKey ?? process.env.OPENAI_COMPAT_KEY ?? "sk-no-key-required";
+
+    this.client = new OpenAI({ baseURL, apiKey });
+
+    console.log("[OpenAI-Compatible] LLM service initialized");
+  }
+
+  async generateContent(
+    system: string,
+    prompt: string,
+    model?: string,
+    temperature?: number,
+  ): Promise<LlmResult> {
+    const modelName = model ?? this.defaultModel;
+    const temp = temperature ?? this.defaultTemperature;
+
+    const resp = await this.client.chat.completions.create({
+      model: modelName,
+      messages: [
+        { role: "system", content: system },
+        { role: "user", content: prompt },
+      ],
+      temperature: temp,
+      max_tokens: this.maxOutput,
+    });
+
+    return {
+      text: resp.choices[0].message.content ?? "",
+      inToken: resp.usage?.prompt_tokens ?? 0,
+      outToken: resp.usage?.completion_tokens ?? 0,
+      model: modelName,
+    };
+  }
+
+  override supportsStreaming(): boolean {
+    return true;
+  }
+
+  async *generateContentStream(
+    system: string,
+    prompt: string,
+    model?: string,
+    temperature?: number,
+  ): AsyncGenerator<LlmChunk> {
+    const modelName = model ?? this.defaultModel;
+    const temp = temperature ?? this.defaultTemperature;
+
+    const stream = await this.client.chat.completions.create({
+      model: modelName,
+      messages: [
+        { role: "system", content: system },
+        { role: "user", content: prompt },
+      ],
+      temperature: temp,
+      max_tokens: this.maxOutput,
+      stream: true,
+    });
+
+    let totalInputTokens = 0;
+    let totalOutputTokens = 0;
+
+    for await (const chunk of stream) {
+      if (chunk.choices?.[0]?.delta?.content) {
+        yield {
+          text: chunk.choices[0].delta.content,
+          inToken: null,
+          outToken: null,
+          model: modelName,
+          isFinal: false,
+        };
+      }
+
+      if (chunk.usage) {
+        totalInputTokens = chunk.usage.prompt_tokens;
+        totalOutputTokens = chunk.usage.completion_tokens;
+      }
+    }
+
+    yield {
+      text: "",
+      inToken: totalInputTokens,
+      outToken: totalOutputTokens,
+      model: modelName,
+      isFinal: true,
+    };
+  }
+}
+
+export async function run(): Promise<void> {
+  await OpenAICompatibleProcessor.launch("text-completion");
+}