mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-07-01 17:39:39 +02:00
saving
This commit is contained in:
parent
e8c7a4f6e0
commit
ffd97375a8
160 changed files with 6704 additions and 1895 deletions
|
|
@ -28,6 +28,7 @@ import {
|
|||
type Triple,
|
||||
type Term,
|
||||
} from "@trustgraph/base";
|
||||
import { makeProcessorProgram } from "@trustgraph/base";
|
||||
|
||||
// Well-known RDF/SKOS IRIs
|
||||
const RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label";
|
||||
|
|
@ -49,7 +50,7 @@ export class KnowledgeExtractService extends FlowProcessor {
|
|||
super(config);
|
||||
|
||||
this.registerSpecification(
|
||||
new ConsumerSpec<Chunk>("extract-input", this.onMessage.bind(this)),
|
||||
ConsumerSpec.fromPromise<Chunk>("extract-input", this.onMessage.bind(this)),
|
||||
);
|
||||
this.registerSpecification(new ProducerSpec<Triples>("extract-triples"));
|
||||
this.registerSpecification(new ProducerSpec<EntityContexts>("extract-entity-contexts"));
|
||||
|
|
@ -78,10 +79,10 @@ export class KnowledgeExtractService extends FlowProcessor {
|
|||
flowCtx: FlowContext,
|
||||
): Promise<void> {
|
||||
const requestId = properties.id;
|
||||
if (!requestId) return;
|
||||
if (requestId === undefined || requestId.length === 0) return;
|
||||
|
||||
const text = msg.chunk;
|
||||
if (!text || text.trim().length === 0) return;
|
||||
if (text.trim().length === 0) return;
|
||||
|
||||
const promptClient = flowCtx.flow.requestor<PromptRequest, PromptResponse>("prompt-client");
|
||||
const llmClient = flowCtx.flow.requestor<TextCompletionRequest, TextCompletionResponse>("llm-client");
|
||||
|
|
@ -98,7 +99,7 @@ export class KnowledgeExtractService extends FlowProcessor {
|
|||
{ timeoutMs: 10_000 },
|
||||
);
|
||||
|
||||
if (!relPrompt.error) {
|
||||
if (relPrompt.error === undefined) {
|
||||
let relationships: ExtractedRelationship[] | null = null;
|
||||
for (let attempt = 0; attempt < 3; attempt++) {
|
||||
const relCompletion = await llmClient.request(
|
||||
|
|
@ -106,18 +107,27 @@ export class KnowledgeExtractService extends FlowProcessor {
|
|||
{ timeoutMs: 120_000 },
|
||||
);
|
||||
|
||||
if (!relCompletion.error && relCompletion.response) {
|
||||
if (
|
||||
relCompletion.error === undefined &&
|
||||
relCompletion.response.length > 0
|
||||
) {
|
||||
relationships = parseJsonResponse<ExtractedRelationship[]>(relCompletion.response);
|
||||
if (relationships) break;
|
||||
if (relationships !== null) break;
|
||||
console.warn(`[KnowledgeExtract] Relationship parse failed, attempt ${attempt + 1}/3`);
|
||||
} else {
|
||||
break; // LLM error, don't retry
|
||||
}
|
||||
}
|
||||
|
||||
if (relationships) {
|
||||
if (relationships !== null) {
|
||||
for (const rel of relationships) {
|
||||
if (!rel.subject || !rel.predicate || !rel.object) continue;
|
||||
if (
|
||||
rel.subject.length === 0 ||
|
||||
rel.predicate.length === 0 ||
|
||||
rel.object.length === 0
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const subjectIri = toEntityIri(rel.subject);
|
||||
const predicateIri = toEntityIri(rel.predicate);
|
||||
|
|
@ -170,7 +180,7 @@ export class KnowledgeExtractService extends FlowProcessor {
|
|||
{ timeoutMs: 10_000 },
|
||||
);
|
||||
|
||||
if (!defPrompt.error) {
|
||||
if (defPrompt.error === undefined) {
|
||||
let definitions: ExtractedDefinition[] | null = null;
|
||||
for (let attempt = 0; attempt < 3; attempt++) {
|
||||
const defCompletion = await llmClient.request(
|
||||
|
|
@ -178,18 +188,21 @@ export class KnowledgeExtractService extends FlowProcessor {
|
|||
{ timeoutMs: 120_000 },
|
||||
);
|
||||
|
||||
if (!defCompletion.error && defCompletion.response) {
|
||||
if (
|
||||
defCompletion.error === undefined &&
|
||||
defCompletion.response.length > 0
|
||||
) {
|
||||
definitions = parseJsonResponse<ExtractedDefinition[]>(defCompletion.response);
|
||||
if (definitions) break;
|
||||
if (definitions !== null) break;
|
||||
console.warn(`[KnowledgeExtract] Definition parse failed, attempt ${attempt + 1}/3`);
|
||||
} else {
|
||||
break; // LLM error, don't retry
|
||||
}
|
||||
}
|
||||
|
||||
if (definitions) {
|
||||
if (definitions !== null) {
|
||||
for (const def of definitions) {
|
||||
if (!def.entity || !def.definition) continue;
|
||||
if (def.entity.length === 0 || def.definition.length === 0) continue;
|
||||
|
||||
const entityIri = toEntityIri(def.entity);
|
||||
|
||||
|
|
@ -265,8 +278,8 @@ export function parseJsonResponse<T>(raw: string): T | null {
|
|||
// Attempt 1: direct parse after stripping fences
|
||||
let cleaned = raw.trim();
|
||||
const fenceMatch = cleaned.match(/^```(?:json)?\s*\n?([\s\S]*?)\n?```$/);
|
||||
if (fenceMatch) {
|
||||
cleaned = fenceMatch[1].trim();
|
||||
if (fenceMatch !== null) {
|
||||
cleaned = (fenceMatch[1] ?? "").trim();
|
||||
}
|
||||
|
||||
try {
|
||||
|
|
@ -275,7 +288,7 @@ export function parseJsonResponse<T>(raw: string): T | null {
|
|||
|
||||
// Attempt 2: extract first JSON array from the text
|
||||
const arrayMatch = cleaned.match(/\[[\s\S]*\]/);
|
||||
if (arrayMatch) {
|
||||
if (arrayMatch !== null) {
|
||||
try {
|
||||
return JSON.parse(arrayMatch[0]) as T;
|
||||
} catch { /* fall through */ }
|
||||
|
|
@ -293,7 +306,7 @@ export function parseJsonResponse<T>(raw: string): T | null {
|
|||
|
||||
// Attempt 4: extract first JSON object, wrap in array
|
||||
const objMatch = cleaned.match(/\{[\s\S]*?\}/);
|
||||
if (objMatch) {
|
||||
if (objMatch !== null) {
|
||||
try {
|
||||
const obj = JSON.parse(objMatch[0]);
|
||||
return [obj] as unknown as T;
|
||||
|
|
@ -304,6 +317,11 @@ export function parseJsonResponse<T>(raw: string): T | null {
|
|||
return null;
|
||||
}
|
||||
|
||||
export const program = makeProcessorProgram({
|
||||
id: "knowledge-extract",
|
||||
make: (config) => new KnowledgeExtractService(config),
|
||||
});
|
||||
|
||||
export async function run(): Promise<void> {
|
||||
await KnowledgeExtractService.launch("knowledge-extract");
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue