feat: add Docker entrypoints, LLM providers, pipeline hardening, workbench pages

Phase 9 — four parallel workstreams: - Stream A: 14 Docker entrypoints for containerized deployment - Stream B: Pipeline hardening — robust JSON parsing, LLM retry logic, consumer negative-ack, FalkorDB test import fix - Stream C: Azure OpenAI, OpenAI-compatible, and Mistral LLM providers - Stream D: Workbench Prompts, Token Cost, Knowledge Cores pages + Settings feature switches Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-07-01 17:39:39 +02:00 · 2026-04-07 03:22:55 -05:00 · 2026-04-07 03:22:55 -05:00 · c7eefee607
commit c7eefee607
parent 50fb311d2d
34 changed files with 1457 additions and 112 deletions
--- a/ts/packages/flow/src/model/text-completion/azure-openai.ts
+++ b/ts/packages/flow/src/model/text-completion/azure-openai.ts
@ -0,0 +1,156 @@
+/**
+ * Azure OpenAI text completion service.
+ *
+ * Env:
+ *   AZURE_TOKEN       (required – Azure OpenAI API key)
+ *   AZURE_ENDPOINT    (required – e.g. https://my-resource.openai.azure.com)
+ *   AZURE_MODEL       (default: gpt-4o)
+ *   AZURE_API_VERSION (default: 2024-12-01-preview)
+ */
+
+import { AzureOpenAI } from "openai";
+import {
+  LlmService,
+  type ProcessorConfig,
+  type LlmResult,
+  type LlmChunk,
+  TooManyRequestsError,
+} from "@trustgraph/base";
+
+export class AzureOpenAIProcessor extends LlmService {
+  private client: AzureOpenAI;
+  private readonly defaultModel: string;
+  private readonly defaultTemperature: number;
+  private readonly maxOutput: number;
+
+  constructor(
+    config: ProcessorConfig & {
+      model?: string;
+      apiKey?: string;
+      endpoint?: string;
+      apiVersion?: string;
+      temperature?: number;
+      maxOutput?: number;
+    },
+  ) {
+    super(config);
+
+    this.defaultModel = config.model ?? process.env.AZURE_MODEL ?? "gpt-4o";
+    this.defaultTemperature = config.temperature ?? 0.0;
+    this.maxOutput = config.maxOutput ?? 4096;
+
+    const apiKey = config.apiKey ?? process.env.AZURE_TOKEN;
+    if (!apiKey) throw new Error("Azure OpenAI API key not specified");
+
+    const endpoint = config.endpoint ?? process.env.AZURE_ENDPOINT;
+    if (!endpoint) throw new Error("Azure OpenAI endpoint not specified");
+
+    const apiVersion =
+      config.apiVersion ??
+      process.env.AZURE_API_VERSION ??
+      "2024-12-01-preview";
+
+    this.client = new AzureOpenAI({ apiKey, apiVersion, endpoint });
+
+    console.log("[AzureOpenAI] LLM service initialized");
+  }
+
+  async generateContent(
+    system: string,
+    prompt: string,
+    model?: string,
+    temperature?: number,
+  ): Promise<LlmResult> {
+    const modelName = model ?? this.defaultModel;
+    const temp = temperature ?? this.defaultTemperature;
+
+    try {
+      const resp = await this.client.chat.completions.create({
+        model: modelName,
+        messages: [
+          { role: "system", content: system },
+          { role: "user", content: prompt },
+        ],
+        temperature: temp,
+        max_completion_tokens: this.maxOutput,
+      });
+
+      return {
+        text: resp.choices[0].message.content ?? "",
+        inToken: resp.usage?.prompt_tokens ?? 0,
+        outToken: resp.usage?.completion_tokens ?? 0,
+        model: modelName,
+      };
+    } catch (err) {
+      if ((err as any)?.status === 429) {
+        throw new TooManyRequestsError();
+      }
+      throw err;
+    }
+  }
+
+  override supportsStreaming(): boolean {
+    return true;
+  }
+
+  async *generateContentStream(
+    system: string,
+    prompt: string,
+    model?: string,
+    temperature?: number,
+  ): AsyncGenerator<LlmChunk> {
+    const modelName = model ?? this.defaultModel;
+    const temp = temperature ?? this.defaultTemperature;
+
+    try {
+      const stream = await this.client.chat.completions.create({
+        model: modelName,
+        messages: [
+          { role: "system", content: system },
+          { role: "user", content: prompt },
+        ],
+        temperature: temp,
+        max_completion_tokens: this.maxOutput,
+        stream: true,
+        stream_options: { include_usage: true },
+      });
+
+      let totalInputTokens = 0;
+      let totalOutputTokens = 0;
+
+      for await (const chunk of stream) {
+        if (chunk.choices?.[0]?.delta?.content) {
+          yield {
+            text: chunk.choices[0].delta.content,
+            inToken: null,
+            outToken: null,
+            model: modelName,
+            isFinal: false,
+          };
+        }
+
+        if (chunk.usage) {
+          totalInputTokens = chunk.usage.prompt_tokens;
+          totalOutputTokens = chunk.usage.completion_tokens;
+        }
+      }
+
+      yield {
+        text: "",
+        inToken: totalInputTokens,
+        outToken: totalOutputTokens,
+        model: modelName,
+        isFinal: true,
+      };
+    } catch (err) {
+      if ((err as any)?.status === 429) {
+        throw new TooManyRequestsError();
+      }
+      throw err;
+    }
+  }
+}
+
+export async function run(): Promise<void> {
+  await AzureOpenAIProcessor.launch("text-completion");
+}
--- a/ts/packages/flow/src/model/text-completion/mistral.ts
+++ b/ts/packages/flow/src/model/text-completion/mistral.ts
@ -0,0 +1,144 @@
+/**
+ * Mistral text completion service.
+ *
+ * Env:
+ *   MISTRAL_TOKEN (required – Mistral API key)
+ *   MISTRAL_MODEL (default: ministral-8b-latest)
+ */
+
+import { Mistral } from "@mistralai/mistralai";
+import {
+  LlmService,
+  type ProcessorConfig,
+  type LlmResult,
+  type LlmChunk,
+  TooManyRequestsError,
+} from "@trustgraph/base";
+
+export class MistralProcessor extends LlmService {
+  private client: Mistral;
+  private readonly defaultModel: string;
+  private readonly defaultTemperature: number;
+  private readonly maxOutput: number;
+
+  constructor(
+    config: ProcessorConfig & {
+      model?: string;
+      apiKey?: string;
+      temperature?: number;
+      maxOutput?: number;
+    },
+  ) {
+    super(config);
+
+    this.defaultModel =
+      config.model ?? process.env.MISTRAL_MODEL ?? "ministral-8b-latest";
+    this.defaultTemperature = config.temperature ?? 0.0;
+    this.maxOutput = config.maxOutput ?? 4096;
+
+    const apiKey = config.apiKey ?? process.env.MISTRAL_TOKEN;
+    if (!apiKey) throw new Error("Mistral API key not specified");
+
+    this.client = new Mistral({ apiKey });
+
+    console.log("[Mistral] LLM service initialized");
+  }
+
+  async generateContent(
+    system: string,
+    prompt: string,
+    model?: string,
+    temperature?: number,
+  ): Promise<LlmResult> {
+    const modelName = model ?? this.defaultModel;
+    const temp = temperature ?? this.defaultTemperature;
+
+    try {
+      const resp = await this.client.chat.complete({
+        model: modelName,
+        messages: [
+          { role: "system", content: system },
+          { role: "user", content: prompt },
+        ],
+        temperature: temp,
+        maxTokens: this.maxOutput,
+      });
+
+      return {
+        text: (resp.choices?.[0]?.message?.content as string) ?? "",
+        inToken: resp.usage?.promptTokens ?? 0,
+        outToken: resp.usage?.completionTokens ?? 0,
+        model: modelName,
+      };
+    } catch (err) {
+      if ((err as any)?.statusCode === 429 || (err as any)?.status === 429) {
+        throw new TooManyRequestsError();
+      }
+      throw err;
+    }
+  }
+
+  override supportsStreaming(): boolean {
+    return true;
+  }
+
+  async *generateContentStream(
+    system: string,
+    prompt: string,
+    model?: string,
+    temperature?: number,
+  ): AsyncGenerator<LlmChunk> {
+    const modelName = model ?? this.defaultModel;
+    const temp = temperature ?? this.defaultTemperature;
+
+    try {
+      const stream = await this.client.chat.stream({
+        model: modelName,
+        messages: [
+          { role: "system", content: system },
+          { role: "user", content: prompt },
+        ],
+        temperature: temp,
+        maxTokens: this.maxOutput,
+      });
+
+      let totalInputTokens = 0;
+      let totalOutputTokens = 0;
+
+      for await (const chunk of stream) {
+        const delta = chunk.data?.choices?.[0]?.delta;
+        if (delta?.content) {
+          yield {
+            text: delta.content as string,
+            inToken: null,
+            outToken: null,
+            model: modelName,
+            isFinal: false,
+          };
+        }
+
+        if (chunk.data?.usage) {
+          totalInputTokens = chunk.data.usage.promptTokens ?? 0;
+          totalOutputTokens = chunk.data.usage.completionTokens ?? 0;
+        }
+      }
+
+      yield {
+        text: "",
+        inToken: totalInputTokens,
+        outToken: totalOutputTokens,
+        model: modelName,
+        isFinal: true,
+      };
+    } catch (err) {
+      if ((err as any)?.statusCode === 429 || (err as any)?.status === 429) {
+        throw new TooManyRequestsError();
+      }
+      throw err;
+    }
+  }
+}
+
+export async function run(): Promise<void> {
+  await MistralProcessor.launch("text-completion");
+}
--- a/ts/packages/flow/src/model/text-completion/openai-compatible.ts
+++ b/ts/packages/flow/src/model/text-completion/openai-compatible.ts
@ -0,0 +1,139 @@
+/**
+ * OpenAI-compatible text completion service (generic local server).
+ *
+ * Works with LM Studio, llama.cpp, vLLM, Ollama OpenAI-compat endpoint, etc.
+ *
+ * Env:
+ *   OPENAI_COMPAT_URL   (required – e.g. http://localhost:1234/v1)
+ *   OPENAI_COMPAT_KEY   (default: sk-no-key-required)
+ *   OPENAI_COMPAT_MODEL (default: default)
+ */
+
+import OpenAI from "openai";
+import {
+  LlmService,
+  type ProcessorConfig,
+  type LlmResult,
+  type LlmChunk,
+} from "@trustgraph/base";
+
+export class OpenAICompatibleProcessor extends LlmService {
+  private client: OpenAI;
+  private readonly defaultModel: string;
+  private readonly defaultTemperature: number;
+  private readonly maxOutput: number;
+
+  constructor(
+    config: ProcessorConfig & {
+      model?: string;
+      apiKey?: string;
+      baseUrl?: string;
+      temperature?: number;
+      maxOutput?: number;
+    },
+  ) {
+    super(config);
+
+    this.defaultModel =
+      config.model ?? process.env.OPENAI_COMPAT_MODEL ?? "default";
+    this.defaultTemperature = config.temperature ?? 0.0;
+    this.maxOutput = config.maxOutput ?? 4096;
+
+    const baseURL = config.baseUrl ?? process.env.OPENAI_COMPAT_URL;
+    if (!baseURL)
+      throw new Error(
+        "OpenAI-compatible server URL not specified (set OPENAI_COMPAT_URL)",
+      );
+
+    const apiKey =
+      config.apiKey ?? process.env.OPENAI_COMPAT_KEY ?? "sk-no-key-required";
+
+    this.client = new OpenAI({ baseURL, apiKey });
+
+    console.log("[OpenAI-Compatible] LLM service initialized");
+  }
+
+  async generateContent(
+    system: string,
+    prompt: string,
+    model?: string,
+    temperature?: number,
+  ): Promise<LlmResult> {
+    const modelName = model ?? this.defaultModel;
+    const temp = temperature ?? this.defaultTemperature;
+
+    const resp = await this.client.chat.completions.create({
+      model: modelName,
+      messages: [
+        { role: "system", content: system },
+        { role: "user", content: prompt },
+      ],
+      temperature: temp,
+      max_tokens: this.maxOutput,
+    });
+
+    return {
+      text: resp.choices[0].message.content ?? "",
+      inToken: resp.usage?.prompt_tokens ?? 0,
+      outToken: resp.usage?.completion_tokens ?? 0,
+      model: modelName,
+    };
+  }
+
+  override supportsStreaming(): boolean {
+    return true;
+  }
+
+  async *generateContentStream(
+    system: string,
+    prompt: string,
+    model?: string,
+    temperature?: number,
+  ): AsyncGenerator<LlmChunk> {
+    const modelName = model ?? this.defaultModel;
+    const temp = temperature ?? this.defaultTemperature;
+
+    const stream = await this.client.chat.completions.create({
+      model: modelName,
+      messages: [
+        { role: "system", content: system },
+        { role: "user", content: prompt },
+      ],
+      temperature: temp,
+      max_tokens: this.maxOutput,
+      stream: true,
+    });
+
+    let totalInputTokens = 0;
+    let totalOutputTokens = 0;
+
+    for await (const chunk of stream) {
+      if (chunk.choices?.[0]?.delta?.content) {
+        yield {
+          text: chunk.choices[0].delta.content,
+          inToken: null,
+          outToken: null,
+          model: modelName,
+          isFinal: false,
+        };
+      }
+
+      if (chunk.usage) {
+        totalInputTokens = chunk.usage.prompt_tokens;
+        totalOutputTokens = chunk.usage.completion_tokens;
+      }
+    }
+
+    yield {
+      text: "",
+      inToken: totalInputTokens,
+      outToken: totalOutputTokens,
+      model: modelName,
+      isFinal: true,
+    };
+  }
+}
+
+export async function run(): Promise<void> {
+  await OpenAICompatibleProcessor.launch("text-completion");
+}