mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-07-01 09:29:38 +02:00
Advance TS port Effect workbench
This commit is contained in:
parent
92dae8c374
commit
3515106670
116 changed files with 12286 additions and 9584 deletions
|
|
@ -21,81 +21,86 @@ import {
|
|||
type TextDocument,
|
||||
type Chunk,
|
||||
type Triples,
|
||||
type Spec,
|
||||
} from "@trustgraph/base";
|
||||
import { makeProcessorProgram } from "@trustgraph/base";
|
||||
import { makeFlowProcessorProgram } from "@trustgraph/base";
|
||||
import { Effect } from "effect";
|
||||
import { recursiveSplit } from "./recursive-splitter.js";
|
||||
|
||||
const DEFAULT_CHUNK_SIZE = 2000;
|
||||
const DEFAULT_CHUNK_OVERLAP = 100;
|
||||
|
||||
const onChunkMessage = Effect.fn("ChunkingService.onMessage")(function* (
|
||||
msg: TextDocument,
|
||||
properties: Record<string, string>,
|
||||
flowCtx: FlowContext,
|
||||
) {
|
||||
const requestId = properties.id;
|
||||
if (requestId === undefined || requestId.length === 0) return;
|
||||
|
||||
const chunkSize = yield* flowCtx.flow.parameterEffect<number>("chunk-size").pipe(
|
||||
Effect.orElseSucceed(() => DEFAULT_CHUNK_SIZE),
|
||||
);
|
||||
const chunkOverlap = yield* flowCtx.flow.parameterEffect<number>("chunk-overlap").pipe(
|
||||
Effect.orElseSucceed(() => DEFAULT_CHUNK_OVERLAP),
|
||||
);
|
||||
|
||||
const text = msg.text;
|
||||
if (text.trim().length === 0) {
|
||||
yield* Effect.logWarning(`[ChunkingService] Empty text received for document ${msg.documentId}`);
|
||||
return;
|
||||
}
|
||||
|
||||
const chunks = recursiveSplit(text, chunkSize, chunkOverlap);
|
||||
|
||||
yield* Effect.log(
|
||||
`[ChunkingService] Split document ${msg.documentId} into ${chunks.length} chunks (size=${chunkSize}, overlap=${chunkOverlap})`,
|
||||
);
|
||||
|
||||
const outputProducer = yield* flowCtx.flow.producerEffect<Chunk>("chunk-output");
|
||||
|
||||
yield* Effect.forEach(
|
||||
chunks,
|
||||
(chunkText) =>
|
||||
outputProducer.send(requestId, {
|
||||
metadata: msg.metadata,
|
||||
chunk: chunkText,
|
||||
documentId: msg.documentId,
|
||||
}),
|
||||
{ discard: true },
|
||||
);
|
||||
});
|
||||
|
||||
export const makeChunkingSpecs = (): ReadonlyArray<
|
||||
Spec<never>
|
||||
> => [
|
||||
new ConsumerSpec<TextDocument, FlowResourceNotFoundError | MessagingDeliveryError>(
|
||||
"chunk-input",
|
||||
onChunkMessage,
|
||||
),
|
||||
new ProducerSpec<Chunk>("chunk-output"),
|
||||
new ProducerSpec<Triples>("chunk-triples"),
|
||||
new ParameterSpec("chunk-size"),
|
||||
new ParameterSpec("chunk-overlap"),
|
||||
];
|
||||
|
||||
export class ChunkingService extends FlowProcessor {
|
||||
constructor(config: ProcessorConfig) {
|
||||
super(config);
|
||||
|
||||
this.registerSpecification(
|
||||
new ConsumerSpec<TextDocument, FlowResourceNotFoundError | MessagingDeliveryError>(
|
||||
"chunk-input",
|
||||
this.onMessageEffect.bind(this),
|
||||
),
|
||||
);
|
||||
this.registerSpecification(new ProducerSpec<Chunk>("chunk-output"));
|
||||
this.registerSpecification(new ProducerSpec<Triples>("chunk-triples"));
|
||||
this.registerSpecification(new ParameterSpec("chunk-size"));
|
||||
this.registerSpecification(new ParameterSpec("chunk-overlap"));
|
||||
for (const spec of makeChunkingSpecs()) {
|
||||
this.registerSpecification(spec);
|
||||
}
|
||||
|
||||
console.log("[ChunkingService] Service initialized");
|
||||
}
|
||||
|
||||
private onMessageEffect(
|
||||
msg: TextDocument,
|
||||
properties: Record<string, string>,
|
||||
flowCtx: FlowContext,
|
||||
) {
|
||||
return Effect.gen(function* () {
|
||||
const requestId = properties.id;
|
||||
if (requestId === undefined || requestId.length === 0) return;
|
||||
|
||||
const chunkSize = yield* flowCtx.flow.parameterEffect<number>("chunk-size").pipe(
|
||||
Effect.catch(() => Effect.succeed(DEFAULT_CHUNK_SIZE)),
|
||||
);
|
||||
const chunkOverlap = yield* flowCtx.flow.parameterEffect<number>("chunk-overlap").pipe(
|
||||
Effect.catch(() => Effect.succeed(DEFAULT_CHUNK_OVERLAP)),
|
||||
);
|
||||
|
||||
const text = msg.text;
|
||||
if (text.trim().length === 0) {
|
||||
yield* Effect.logWarning(`[ChunkingService] Empty text received for document ${msg.documentId}`);
|
||||
return;
|
||||
}
|
||||
|
||||
const chunks = recursiveSplit(text, chunkSize, chunkOverlap);
|
||||
|
||||
yield* Effect.log(
|
||||
`[ChunkingService] Split document ${msg.documentId} into ${chunks.length} chunks (size=${chunkSize}, overlap=${chunkOverlap})`,
|
||||
);
|
||||
|
||||
const outputProducer = yield* flowCtx.flow.producerEffect<Chunk>("chunk-output");
|
||||
|
||||
yield* Effect.forEach(
|
||||
chunks,
|
||||
(chunkText) =>
|
||||
outputProducer.send(requestId, {
|
||||
metadata: msg.metadata,
|
||||
chunk: chunkText,
|
||||
documentId: msg.documentId,
|
||||
}),
|
||||
{ discard: true },
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export const program = makeProcessorProgram({
|
||||
export const program = makeFlowProcessorProgram({
|
||||
id: "chunking",
|
||||
make: (config) => new ChunkingService(config),
|
||||
specs: () => makeChunkingSpecs(),
|
||||
});
|
||||
|
||||
export async function run(): Promise<void> {
|
||||
await ChunkingService.launch("chunking");
|
||||
await Effect.runPromise(program);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue