mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-07-01 09:29:38 +02:00
feat: add Docker entrypoints, LLM providers, pipeline hardening, workbench pages
Phase 9 — four parallel workstreams: - Stream A: 14 Docker entrypoints for containerized deployment - Stream B: Pipeline hardening — robust JSON parsing, LLM retry logic, consumer negative-ack, FalkorDB test import fix - Stream C: Azure OpenAI, OpenAI-compatible, and Mistral LLM providers - Stream D: Workbench Prompts, Token Cost, Knowledge Cores pages + Settings feature switches Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
50fb311d2d
commit
c7eefee607
34 changed files with 1457 additions and 112 deletions
|
|
@ -18,6 +18,7 @@
|
|||
"falkordb": "^5.0.0",
|
||||
"fastify": "^5.2.0",
|
||||
"ollama": "^0.6.3",
|
||||
"@mistralai/mistralai": "^1.0.0",
|
||||
"openai": "^4.85.0",
|
||||
"pdfjs-dist": "^5.6.205"
|
||||
},
|
||||
|
|
|
|||
|
|
@ -93,64 +93,71 @@ export class KnowledgeExtractService extends FlowProcessor {
|
|||
|
||||
// --- Extract relationships ---
|
||||
try {
|
||||
const relPrompt = await promptClient.request({
|
||||
name: "extract-relationships",
|
||||
variables: { text },
|
||||
});
|
||||
const relPrompt = await promptClient.request(
|
||||
{ name: "extract-relationships", variables: { text } },
|
||||
{ timeoutMs: 10_000 },
|
||||
);
|
||||
|
||||
if (!relPrompt.error) {
|
||||
const relCompletion = await llmClient.request({
|
||||
system: relPrompt.system,
|
||||
prompt: relPrompt.prompt,
|
||||
});
|
||||
let relationships: ExtractedRelationship[] | null = null;
|
||||
for (let attempt = 0; attempt < 3; attempt++) {
|
||||
const relCompletion = await llmClient.request(
|
||||
{ system: relPrompt.system, prompt: relPrompt.prompt },
|
||||
{ timeoutMs: 120_000 },
|
||||
);
|
||||
|
||||
if (!relCompletion.error && relCompletion.response) {
|
||||
const relationships = parseJsonResponse<ExtractedRelationship[]>(relCompletion.response);
|
||||
|
||||
if (relationships) {
|
||||
for (const rel of relationships) {
|
||||
if (!rel.subject || !rel.predicate || !rel.object) continue;
|
||||
|
||||
const subjectIri = toEntityIri(rel.subject);
|
||||
const predicateIri = toEntityIri(rel.predicate);
|
||||
const objectIri = toEntityIri(rel.object);
|
||||
|
||||
// Main relationship triple
|
||||
allTriples.push({ s: subjectIri, p: predicateIri, o: objectIri });
|
||||
|
||||
// rdfs:label triples for each entity
|
||||
allTriples.push({
|
||||
s: subjectIri,
|
||||
p: iriTerm(RDFS_LABEL),
|
||||
o: literalTerm(rel.subject),
|
||||
});
|
||||
allTriples.push({
|
||||
s: predicateIri,
|
||||
p: iriTerm(RDFS_LABEL),
|
||||
o: literalTerm(rel.predicate),
|
||||
});
|
||||
allTriples.push({
|
||||
s: objectIri,
|
||||
p: iriTerm(RDFS_LABEL),
|
||||
o: literalTerm(rel.object),
|
||||
});
|
||||
|
||||
// Entity contexts for subject and object
|
||||
allEntityContexts.push({
|
||||
entity: subjectIri,
|
||||
context: text,
|
||||
chunkId: msg.documentId,
|
||||
});
|
||||
allEntityContexts.push({
|
||||
entity: objectIri,
|
||||
context: text,
|
||||
chunkId: msg.documentId,
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`[KnowledgeExtract] Extracted ${relationships.length} relationships`);
|
||||
if (!relCompletion.error && relCompletion.response) {
|
||||
relationships = parseJsonResponse<ExtractedRelationship[]>(relCompletion.response);
|
||||
if (relationships) break;
|
||||
console.warn(`[KnowledgeExtract] Relationship parse failed, attempt ${attempt + 1}/3`);
|
||||
} else {
|
||||
break; // LLM error, don't retry
|
||||
}
|
||||
}
|
||||
|
||||
if (relationships) {
|
||||
for (const rel of relationships) {
|
||||
if (!rel.subject || !rel.predicate || !rel.object) continue;
|
||||
|
||||
const subjectIri = toEntityIri(rel.subject);
|
||||
const predicateIri = toEntityIri(rel.predicate);
|
||||
const objectIri = toEntityIri(rel.object);
|
||||
|
||||
// Main relationship triple
|
||||
allTriples.push({ s: subjectIri, p: predicateIri, o: objectIri });
|
||||
|
||||
// rdfs:label triples for each entity
|
||||
allTriples.push({
|
||||
s: subjectIri,
|
||||
p: iriTerm(RDFS_LABEL),
|
||||
o: literalTerm(rel.subject),
|
||||
});
|
||||
allTriples.push({
|
||||
s: predicateIri,
|
||||
p: iriTerm(RDFS_LABEL),
|
||||
o: literalTerm(rel.predicate),
|
||||
});
|
||||
allTriples.push({
|
||||
s: objectIri,
|
||||
p: iriTerm(RDFS_LABEL),
|
||||
o: literalTerm(rel.object),
|
||||
});
|
||||
|
||||
// Entity contexts for subject and object
|
||||
allEntityContexts.push({
|
||||
entity: subjectIri,
|
||||
context: text,
|
||||
chunkId: msg.documentId,
|
||||
});
|
||||
allEntityContexts.push({
|
||||
entity: objectIri,
|
||||
context: text,
|
||||
chunkId: msg.documentId,
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`[KnowledgeExtract] Extracted ${relationships.length} relationships`);
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
console.error("[KnowledgeExtract] Relationship extraction failed:", err);
|
||||
|
|
@ -158,51 +165,58 @@ export class KnowledgeExtractService extends FlowProcessor {
|
|||
|
||||
// --- Extract definitions ---
|
||||
try {
|
||||
const defPrompt = await promptClient.request({
|
||||
name: "extract-definitions",
|
||||
variables: { text },
|
||||
});
|
||||
const defPrompt = await promptClient.request(
|
||||
{ name: "extract-definitions", variables: { text } },
|
||||
{ timeoutMs: 10_000 },
|
||||
);
|
||||
|
||||
if (!defPrompt.error) {
|
||||
const defCompletion = await llmClient.request({
|
||||
system: defPrompt.system,
|
||||
prompt: defPrompt.prompt,
|
||||
});
|
||||
let definitions: ExtractedDefinition[] | null = null;
|
||||
for (let attempt = 0; attempt < 3; attempt++) {
|
||||
const defCompletion = await llmClient.request(
|
||||
{ system: defPrompt.system, prompt: defPrompt.prompt },
|
||||
{ timeoutMs: 120_000 },
|
||||
);
|
||||
|
||||
if (!defCompletion.error && defCompletion.response) {
|
||||
const definitions = parseJsonResponse<ExtractedDefinition[]>(defCompletion.response);
|
||||
|
||||
if (definitions) {
|
||||
for (const def of definitions) {
|
||||
if (!def.entity || !def.definition) continue;
|
||||
|
||||
const entityIri = toEntityIri(def.entity);
|
||||
|
||||
// Definition triple
|
||||
allTriples.push({
|
||||
s: entityIri,
|
||||
p: iriTerm(SKOS_DEFINITION),
|
||||
o: literalTerm(def.definition),
|
||||
});
|
||||
|
||||
// Label triple
|
||||
allTriples.push({
|
||||
s: entityIri,
|
||||
p: iriTerm(RDFS_LABEL),
|
||||
o: literalTerm(def.entity),
|
||||
});
|
||||
|
||||
// Entity context
|
||||
allEntityContexts.push({
|
||||
entity: entityIri,
|
||||
context: text,
|
||||
chunkId: msg.documentId,
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`[KnowledgeExtract] Extracted ${definitions.length} definitions`);
|
||||
if (!defCompletion.error && defCompletion.response) {
|
||||
definitions = parseJsonResponse<ExtractedDefinition[]>(defCompletion.response);
|
||||
if (definitions) break;
|
||||
console.warn(`[KnowledgeExtract] Definition parse failed, attempt ${attempt + 1}/3`);
|
||||
} else {
|
||||
break; // LLM error, don't retry
|
||||
}
|
||||
}
|
||||
|
||||
if (definitions) {
|
||||
for (const def of definitions) {
|
||||
if (!def.entity || !def.definition) continue;
|
||||
|
||||
const entityIri = toEntityIri(def.entity);
|
||||
|
||||
// Definition triple
|
||||
allTriples.push({
|
||||
s: entityIri,
|
||||
p: iriTerm(SKOS_DEFINITION),
|
||||
o: literalTerm(def.definition),
|
||||
});
|
||||
|
||||
// Label triple
|
||||
allTriples.push({
|
||||
s: entityIri,
|
||||
p: iriTerm(RDFS_LABEL),
|
||||
o: literalTerm(def.entity),
|
||||
});
|
||||
|
||||
// Entity context
|
||||
allEntityContexts.push({
|
||||
entity: entityIri,
|
||||
context: text,
|
||||
chunkId: msg.documentId,
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`[KnowledgeExtract] Extracted ${definitions.length} definitions`);
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
console.error("[KnowledgeExtract] Definition extraction failed:", err);
|
||||
|
|
@ -245,23 +259,49 @@ function literalTerm(value: string): Term {
|
|||
|
||||
/**
|
||||
* Parse JSON from LLM output, handling markdown code fences and malformed output.
|
||||
* Uses progressive fallback: direct parse, array extraction, truncated array repair, single object wrap.
|
||||
*/
|
||||
function parseJsonResponse<T>(raw: string): T | null {
|
||||
try {
|
||||
// Strip markdown code fences
|
||||
let cleaned = raw.trim();
|
||||
|
||||
// Remove ```json ... ``` or ``` ... ```
|
||||
const fenceMatch = cleaned.match(/^```(?:json)?\s*\n?([\s\S]*?)\n?```$/);
|
||||
if (fenceMatch) {
|
||||
cleaned = fenceMatch[1].trim();
|
||||
}
|
||||
|
||||
return JSON.parse(cleaned) as T;
|
||||
} catch {
|
||||
console.warn("[KnowledgeExtract] Failed to parse JSON from LLM response:", raw.slice(0, 200));
|
||||
return null;
|
||||
// Attempt 1: direct parse after stripping fences
|
||||
let cleaned = raw.trim();
|
||||
const fenceMatch = cleaned.match(/^```(?:json)?\s*\n?([\s\S]*?)\n?```$/);
|
||||
if (fenceMatch) {
|
||||
cleaned = fenceMatch[1].trim();
|
||||
}
|
||||
|
||||
try {
|
||||
return JSON.parse(cleaned) as T;
|
||||
} catch { /* fall through */ }
|
||||
|
||||
// Attempt 2: extract first JSON array from the text
|
||||
const arrayMatch = cleaned.match(/\[[\s\S]*\]/);
|
||||
if (arrayMatch) {
|
||||
try {
|
||||
return JSON.parse(arrayMatch[0]) as T;
|
||||
} catch { /* fall through */ }
|
||||
|
||||
// Attempt 3: try to fix truncated array by closing it after the last complete object
|
||||
const partial = arrayMatch[0];
|
||||
const lastBrace = partial.lastIndexOf('}');
|
||||
if (lastBrace > 0) {
|
||||
const truncated = partial.slice(0, lastBrace + 1) + ']';
|
||||
try {
|
||||
return JSON.parse(truncated) as T;
|
||||
} catch { /* fall through */ }
|
||||
}
|
||||
}
|
||||
|
||||
// Attempt 4: extract first JSON object, wrap in array
|
||||
const objMatch = cleaned.match(/\{[\s\S]*?\}/);
|
||||
if (objMatch) {
|
||||
try {
|
||||
const obj = JSON.parse(objMatch[0]);
|
||||
return [obj] as unknown as T;
|
||||
} catch { /* fall through */ }
|
||||
}
|
||||
|
||||
console.warn("[KnowledgeExtract] Failed to parse JSON from LLM response:", raw.slice(0, 300));
|
||||
return null;
|
||||
}
|
||||
|
||||
export async function run(): Promise<void> {
|
||||
|
|
|
|||
|
|
@ -79,3 +79,12 @@ export { DocumentRagService } from "./retrieval/document-rag-service.js";
|
|||
|
||||
// Flow manager service
|
||||
export { FlowManagerService } from "./flow-manager/service.js";
|
||||
|
||||
// Azure OpenAI text completion
|
||||
export { AzureOpenAIProcessor } from "./model/text-completion/azure-openai.js";
|
||||
|
||||
// OpenAI-compatible text completion
|
||||
export { OpenAICompatibleProcessor } from "./model/text-completion/openai-compatible.js";
|
||||
|
||||
// Mistral text completion
|
||||
export { MistralProcessor } from "./model/text-completion/mistral.js";
|
||||
|
|
|
|||
156
ts/packages/flow/src/model/text-completion/azure-openai.ts
Normal file
156
ts/packages/flow/src/model/text-completion/azure-openai.ts
Normal file
|
|
@ -0,0 +1,156 @@
|
|||
/**
|
||||
* Azure OpenAI text completion service.
|
||||
*
|
||||
* Env:
|
||||
* AZURE_TOKEN (required – Azure OpenAI API key)
|
||||
* AZURE_ENDPOINT (required – e.g. https://my-resource.openai.azure.com)
|
||||
* AZURE_MODEL (default: gpt-4o)
|
||||
* AZURE_API_VERSION (default: 2024-12-01-preview)
|
||||
*/
|
||||
|
||||
import { AzureOpenAI } from "openai";
|
||||
import {
|
||||
LlmService,
|
||||
type ProcessorConfig,
|
||||
type LlmResult,
|
||||
type LlmChunk,
|
||||
TooManyRequestsError,
|
||||
} from "@trustgraph/base";
|
||||
|
||||
export class AzureOpenAIProcessor extends LlmService {
|
||||
private client: AzureOpenAI;
|
||||
private readonly defaultModel: string;
|
||||
private readonly defaultTemperature: number;
|
||||
private readonly maxOutput: number;
|
||||
|
||||
constructor(
|
||||
config: ProcessorConfig & {
|
||||
model?: string;
|
||||
apiKey?: string;
|
||||
endpoint?: string;
|
||||
apiVersion?: string;
|
||||
temperature?: number;
|
||||
maxOutput?: number;
|
||||
},
|
||||
) {
|
||||
super(config);
|
||||
|
||||
this.defaultModel = config.model ?? process.env.AZURE_MODEL ?? "gpt-4o";
|
||||
this.defaultTemperature = config.temperature ?? 0.0;
|
||||
this.maxOutput = config.maxOutput ?? 4096;
|
||||
|
||||
const apiKey = config.apiKey ?? process.env.AZURE_TOKEN;
|
||||
if (!apiKey) throw new Error("Azure OpenAI API key not specified");
|
||||
|
||||
const endpoint = config.endpoint ?? process.env.AZURE_ENDPOINT;
|
||||
if (!endpoint) throw new Error("Azure OpenAI endpoint not specified");
|
||||
|
||||
const apiVersion =
|
||||
config.apiVersion ??
|
||||
process.env.AZURE_API_VERSION ??
|
||||
"2024-12-01-preview";
|
||||
|
||||
this.client = new AzureOpenAI({ apiKey, apiVersion, endpoint });
|
||||
|
||||
console.log("[AzureOpenAI] LLM service initialized");
|
||||
}
|
||||
|
||||
async generateContent(
|
||||
system: string,
|
||||
prompt: string,
|
||||
model?: string,
|
||||
temperature?: number,
|
||||
): Promise<LlmResult> {
|
||||
const modelName = model ?? this.defaultModel;
|
||||
const temp = temperature ?? this.defaultTemperature;
|
||||
|
||||
try {
|
||||
const resp = await this.client.chat.completions.create({
|
||||
model: modelName,
|
||||
messages: [
|
||||
{ role: "system", content: system },
|
||||
{ role: "user", content: prompt },
|
||||
],
|
||||
temperature: temp,
|
||||
max_completion_tokens: this.maxOutput,
|
||||
});
|
||||
|
||||
return {
|
||||
text: resp.choices[0].message.content ?? "",
|
||||
inToken: resp.usage?.prompt_tokens ?? 0,
|
||||
outToken: resp.usage?.completion_tokens ?? 0,
|
||||
model: modelName,
|
||||
};
|
||||
} catch (err) {
|
||||
if ((err as any)?.status === 429) {
|
||||
throw new TooManyRequestsError();
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
override supportsStreaming(): boolean {
|
||||
return true;
|
||||
}
|
||||
|
||||
async *generateContentStream(
|
||||
system: string,
|
||||
prompt: string,
|
||||
model?: string,
|
||||
temperature?: number,
|
||||
): AsyncGenerator<LlmChunk> {
|
||||
const modelName = model ?? this.defaultModel;
|
||||
const temp = temperature ?? this.defaultTemperature;
|
||||
|
||||
try {
|
||||
const stream = await this.client.chat.completions.create({
|
||||
model: modelName,
|
||||
messages: [
|
||||
{ role: "system", content: system },
|
||||
{ role: "user", content: prompt },
|
||||
],
|
||||
temperature: temp,
|
||||
max_completion_tokens: this.maxOutput,
|
||||
stream: true,
|
||||
stream_options: { include_usage: true },
|
||||
});
|
||||
|
||||
let totalInputTokens = 0;
|
||||
let totalOutputTokens = 0;
|
||||
|
||||
for await (const chunk of stream) {
|
||||
if (chunk.choices?.[0]?.delta?.content) {
|
||||
yield {
|
||||
text: chunk.choices[0].delta.content,
|
||||
inToken: null,
|
||||
outToken: null,
|
||||
model: modelName,
|
||||
isFinal: false,
|
||||
};
|
||||
}
|
||||
|
||||
if (chunk.usage) {
|
||||
totalInputTokens = chunk.usage.prompt_tokens;
|
||||
totalOutputTokens = chunk.usage.completion_tokens;
|
||||
}
|
||||
}
|
||||
|
||||
yield {
|
||||
text: "",
|
||||
inToken: totalInputTokens,
|
||||
outToken: totalOutputTokens,
|
||||
model: modelName,
|
||||
isFinal: true,
|
||||
};
|
||||
} catch (err) {
|
||||
if ((err as any)?.status === 429) {
|
||||
throw new TooManyRequestsError();
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function run(): Promise<void> {
|
||||
await AzureOpenAIProcessor.launch("text-completion");
|
||||
}
|
||||
144
ts/packages/flow/src/model/text-completion/mistral.ts
Normal file
144
ts/packages/flow/src/model/text-completion/mistral.ts
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
/**
|
||||
* Mistral text completion service.
|
||||
*
|
||||
* Env:
|
||||
* MISTRAL_TOKEN (required – Mistral API key)
|
||||
* MISTRAL_MODEL (default: ministral-8b-latest)
|
||||
*/
|
||||
|
||||
import { Mistral } from "@mistralai/mistralai";
|
||||
import {
|
||||
LlmService,
|
||||
type ProcessorConfig,
|
||||
type LlmResult,
|
||||
type LlmChunk,
|
||||
TooManyRequestsError,
|
||||
} from "@trustgraph/base";
|
||||
|
||||
export class MistralProcessor extends LlmService {
|
||||
private client: Mistral;
|
||||
private readonly defaultModel: string;
|
||||
private readonly defaultTemperature: number;
|
||||
private readonly maxOutput: number;
|
||||
|
||||
constructor(
|
||||
config: ProcessorConfig & {
|
||||
model?: string;
|
||||
apiKey?: string;
|
||||
temperature?: number;
|
||||
maxOutput?: number;
|
||||
},
|
||||
) {
|
||||
super(config);
|
||||
|
||||
this.defaultModel =
|
||||
config.model ?? process.env.MISTRAL_MODEL ?? "ministral-8b-latest";
|
||||
this.defaultTemperature = config.temperature ?? 0.0;
|
||||
this.maxOutput = config.maxOutput ?? 4096;
|
||||
|
||||
const apiKey = config.apiKey ?? process.env.MISTRAL_TOKEN;
|
||||
if (!apiKey) throw new Error("Mistral API key not specified");
|
||||
|
||||
this.client = new Mistral({ apiKey });
|
||||
|
||||
console.log("[Mistral] LLM service initialized");
|
||||
}
|
||||
|
||||
async generateContent(
|
||||
system: string,
|
||||
prompt: string,
|
||||
model?: string,
|
||||
temperature?: number,
|
||||
): Promise<LlmResult> {
|
||||
const modelName = model ?? this.defaultModel;
|
||||
const temp = temperature ?? this.defaultTemperature;
|
||||
|
||||
try {
|
||||
const resp = await this.client.chat.complete({
|
||||
model: modelName,
|
||||
messages: [
|
||||
{ role: "system", content: system },
|
||||
{ role: "user", content: prompt },
|
||||
],
|
||||
temperature: temp,
|
||||
maxTokens: this.maxOutput,
|
||||
});
|
||||
|
||||
return {
|
||||
text: (resp.choices?.[0]?.message?.content as string) ?? "",
|
||||
inToken: resp.usage?.promptTokens ?? 0,
|
||||
outToken: resp.usage?.completionTokens ?? 0,
|
||||
model: modelName,
|
||||
};
|
||||
} catch (err) {
|
||||
if ((err as any)?.statusCode === 429 || (err as any)?.status === 429) {
|
||||
throw new TooManyRequestsError();
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
override supportsStreaming(): boolean {
|
||||
return true;
|
||||
}
|
||||
|
||||
async *generateContentStream(
|
||||
system: string,
|
||||
prompt: string,
|
||||
model?: string,
|
||||
temperature?: number,
|
||||
): AsyncGenerator<LlmChunk> {
|
||||
const modelName = model ?? this.defaultModel;
|
||||
const temp = temperature ?? this.defaultTemperature;
|
||||
|
||||
try {
|
||||
const stream = await this.client.chat.stream({
|
||||
model: modelName,
|
||||
messages: [
|
||||
{ role: "system", content: system },
|
||||
{ role: "user", content: prompt },
|
||||
],
|
||||
temperature: temp,
|
||||
maxTokens: this.maxOutput,
|
||||
});
|
||||
|
||||
let totalInputTokens = 0;
|
||||
let totalOutputTokens = 0;
|
||||
|
||||
for await (const chunk of stream) {
|
||||
const delta = chunk.data?.choices?.[0]?.delta;
|
||||
if (delta?.content) {
|
||||
yield {
|
||||
text: delta.content as string,
|
||||
inToken: null,
|
||||
outToken: null,
|
||||
model: modelName,
|
||||
isFinal: false,
|
||||
};
|
||||
}
|
||||
|
||||
if (chunk.data?.usage) {
|
||||
totalInputTokens = chunk.data.usage.promptTokens ?? 0;
|
||||
totalOutputTokens = chunk.data.usage.completionTokens ?? 0;
|
||||
}
|
||||
}
|
||||
|
||||
yield {
|
||||
text: "",
|
||||
inToken: totalInputTokens,
|
||||
outToken: totalOutputTokens,
|
||||
model: modelName,
|
||||
isFinal: true,
|
||||
};
|
||||
} catch (err) {
|
||||
if ((err as any)?.statusCode === 429 || (err as any)?.status === 429) {
|
||||
throw new TooManyRequestsError();
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function run(): Promise<void> {
|
||||
await MistralProcessor.launch("text-completion");
|
||||
}
|
||||
139
ts/packages/flow/src/model/text-completion/openai-compatible.ts
Normal file
139
ts/packages/flow/src/model/text-completion/openai-compatible.ts
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
/**
|
||||
* OpenAI-compatible text completion service (generic local server).
|
||||
*
|
||||
* Works with LM Studio, llama.cpp, vLLM, Ollama OpenAI-compat endpoint, etc.
|
||||
*
|
||||
* Env:
|
||||
* OPENAI_COMPAT_URL (required – e.g. http://localhost:1234/v1)
|
||||
* OPENAI_COMPAT_KEY (default: sk-no-key-required)
|
||||
* OPENAI_COMPAT_MODEL (default: default)
|
||||
*/
|
||||
|
||||
import OpenAI from "openai";
|
||||
import {
|
||||
LlmService,
|
||||
type ProcessorConfig,
|
||||
type LlmResult,
|
||||
type LlmChunk,
|
||||
} from "@trustgraph/base";
|
||||
|
||||
export class OpenAICompatibleProcessor extends LlmService {
|
||||
private client: OpenAI;
|
||||
private readonly defaultModel: string;
|
||||
private readonly defaultTemperature: number;
|
||||
private readonly maxOutput: number;
|
||||
|
||||
constructor(
|
||||
config: ProcessorConfig & {
|
||||
model?: string;
|
||||
apiKey?: string;
|
||||
baseUrl?: string;
|
||||
temperature?: number;
|
||||
maxOutput?: number;
|
||||
},
|
||||
) {
|
||||
super(config);
|
||||
|
||||
this.defaultModel =
|
||||
config.model ?? process.env.OPENAI_COMPAT_MODEL ?? "default";
|
||||
this.defaultTemperature = config.temperature ?? 0.0;
|
||||
this.maxOutput = config.maxOutput ?? 4096;
|
||||
|
||||
const baseURL = config.baseUrl ?? process.env.OPENAI_COMPAT_URL;
|
||||
if (!baseURL)
|
||||
throw new Error(
|
||||
"OpenAI-compatible server URL not specified (set OPENAI_COMPAT_URL)",
|
||||
);
|
||||
|
||||
const apiKey =
|
||||
config.apiKey ?? process.env.OPENAI_COMPAT_KEY ?? "sk-no-key-required";
|
||||
|
||||
this.client = new OpenAI({ baseURL, apiKey });
|
||||
|
||||
console.log("[OpenAI-Compatible] LLM service initialized");
|
||||
}
|
||||
|
||||
async generateContent(
|
||||
system: string,
|
||||
prompt: string,
|
||||
model?: string,
|
||||
temperature?: number,
|
||||
): Promise<LlmResult> {
|
||||
const modelName = model ?? this.defaultModel;
|
||||
const temp = temperature ?? this.defaultTemperature;
|
||||
|
||||
const resp = await this.client.chat.completions.create({
|
||||
model: modelName,
|
||||
messages: [
|
||||
{ role: "system", content: system },
|
||||
{ role: "user", content: prompt },
|
||||
],
|
||||
temperature: temp,
|
||||
max_tokens: this.maxOutput,
|
||||
});
|
||||
|
||||
return {
|
||||
text: resp.choices[0].message.content ?? "",
|
||||
inToken: resp.usage?.prompt_tokens ?? 0,
|
||||
outToken: resp.usage?.completion_tokens ?? 0,
|
||||
model: modelName,
|
||||
};
|
||||
}
|
||||
|
||||
override supportsStreaming(): boolean {
|
||||
return true;
|
||||
}
|
||||
|
||||
async *generateContentStream(
|
||||
system: string,
|
||||
prompt: string,
|
||||
model?: string,
|
||||
temperature?: number,
|
||||
): AsyncGenerator<LlmChunk> {
|
||||
const modelName = model ?? this.defaultModel;
|
||||
const temp = temperature ?? this.defaultTemperature;
|
||||
|
||||
const stream = await this.client.chat.completions.create({
|
||||
model: modelName,
|
||||
messages: [
|
||||
{ role: "system", content: system },
|
||||
{ role: "user", content: prompt },
|
||||
],
|
||||
temperature: temp,
|
||||
max_tokens: this.maxOutput,
|
||||
stream: true,
|
||||
});
|
||||
|
||||
let totalInputTokens = 0;
|
||||
let totalOutputTokens = 0;
|
||||
|
||||
for await (const chunk of stream) {
|
||||
if (chunk.choices?.[0]?.delta?.content) {
|
||||
yield {
|
||||
text: chunk.choices[0].delta.content,
|
||||
inToken: null,
|
||||
outToken: null,
|
||||
model: modelName,
|
||||
isFinal: false,
|
||||
};
|
||||
}
|
||||
|
||||
if (chunk.usage) {
|
||||
totalInputTokens = chunk.usage.prompt_tokens;
|
||||
totalOutputTokens = chunk.usage.completion_tokens;
|
||||
}
|
||||
}
|
||||
|
||||
yield {
|
||||
text: "",
|
||||
inToken: totalInputTokens,
|
||||
outToken: totalOutputTokens,
|
||||
model: modelName,
|
||||
isFinal: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export async function run(): Promise<void> {
|
||||
await OpenAICompatibleProcessor.launch("text-completion");
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue