mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-07-01 17:39:39 +02:00
saving
This commit is contained in:
parent
e8c7a4f6e0
commit
ffd97375a8
160 changed files with 6704 additions and 1895 deletions
230
ts/packages/flow/src/__tests__/chunking-service.test.ts
Normal file
230
ts/packages/flow/src/__tests__/chunking-service.test.ts
Normal file
|
|
@ -0,0 +1,230 @@
|
|||
import { describe, expect, it } from "@effect/vitest";
|
||||
import { ConfigProvider, Effect, Fiber } from "effect";
|
||||
import {
|
||||
MessagingRuntimeLive,
|
||||
PubSub,
|
||||
runProcessorScoped,
|
||||
topics,
|
||||
type BackendConsumer,
|
||||
type BackendProducer,
|
||||
type Chunk,
|
||||
type CreateConsumerOptions,
|
||||
type CreateProducerOptions,
|
||||
type Message,
|
||||
type PubSubBackend,
|
||||
type TextDocument,
|
||||
} from "@trustgraph/base";
|
||||
import { ChunkingService } from "../chunking/service.js";
|
||||
import { recursiveSplit } from "../chunking/recursive-splitter.js";
|
||||
|
||||
function createMessage<T>(value: T, properties: Record<string, string> = {}): Message<T> {
|
||||
return {
|
||||
value: () => value,
|
||||
properties: () => properties,
|
||||
};
|
||||
}
|
||||
|
||||
const waitFor = (condition: () => boolean, label: string) =>
|
||||
Effect.tryPromise({
|
||||
try: () =>
|
||||
new Promise<void>((resolve, reject) => {
|
||||
const deadline = Date.now() + 1000;
|
||||
const check = () => {
|
||||
if (condition()) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
if (Date.now() > deadline) {
|
||||
reject(new Error(`Timed out waiting for ${label}`));
|
||||
return;
|
||||
}
|
||||
setTimeout(check, 5);
|
||||
};
|
||||
check();
|
||||
}),
|
||||
catch: (error) => error,
|
||||
});
|
||||
|
||||
class RecordingProducer<T> implements BackendProducer<T> {
|
||||
readonly sent: Array<{ readonly message: T; readonly properties?: Record<string, string> }> = [];
|
||||
closeCount = 0;
|
||||
flushCount = 0;
|
||||
|
||||
async send(message: T, properties?: Record<string, string>): Promise<void> {
|
||||
this.sent.push(properties === undefined ? { message } : { message, properties });
|
||||
}
|
||||
|
||||
async flush(): Promise<void> {
|
||||
this.flushCount += 1;
|
||||
}
|
||||
|
||||
async close(): Promise<void> {
|
||||
this.closeCount += 1;
|
||||
}
|
||||
}
|
||||
|
||||
class PushConsumer<T> implements BackendConsumer<T> {
|
||||
readonly acknowledged: Array<Message<T>> = [];
|
||||
readonly nacked: Array<Message<T>> = [];
|
||||
closeCount = 0;
|
||||
private readonly messages: Array<Message<T>> = [];
|
||||
private readonly waiters: Array<(message: Message<T> | null) => void> = [];
|
||||
private closed = false;
|
||||
|
||||
push(message: Message<T>): void {
|
||||
const waiter = this.waiters.shift();
|
||||
if (waiter !== undefined) {
|
||||
waiter(message);
|
||||
return;
|
||||
}
|
||||
this.messages.push(message);
|
||||
}
|
||||
|
||||
async receive(): Promise<Message<T> | null> {
|
||||
const message = this.messages.shift();
|
||||
if (message !== undefined || this.closed) {
|
||||
return message ?? null;
|
||||
}
|
||||
return await new Promise((resolve) => {
|
||||
this.waiters.push(resolve);
|
||||
});
|
||||
}
|
||||
|
||||
async acknowledge(message: Message<T>): Promise<void> {
|
||||
this.acknowledged.push(message);
|
||||
}
|
||||
|
||||
async negativeAcknowledge(message: Message<T>): Promise<void> {
|
||||
this.nacked.push(message);
|
||||
}
|
||||
|
||||
async unsubscribe(): Promise<void> {}
|
||||
|
||||
async close(): Promise<void> {
|
||||
this.closed = true;
|
||||
for (const waiter of this.waiters.splice(0)) {
|
||||
waiter(null);
|
||||
}
|
||||
this.closeCount += 1;
|
||||
}
|
||||
}
|
||||
|
||||
class ChunkingBackend implements PubSubBackend {
|
||||
readonly configConsumer = new PushConsumer<{ readonly version: number; readonly config: Record<string, unknown> }>();
|
||||
readonly consumersByTopic = new Map<string, PushConsumer<unknown>>();
|
||||
readonly producersByTopic = new Map<string, RecordingProducer<unknown>>();
|
||||
readonly producerOptions: Array<CreateProducerOptions> = [];
|
||||
readonly consumerOptions: Array<CreateConsumerOptions> = [];
|
||||
closeCount = 0;
|
||||
|
||||
async createProducer<T>(options: CreateProducerOptions): Promise<BackendProducer<T>> {
|
||||
this.producerOptions.push(options);
|
||||
const producer = new RecordingProducer<unknown>();
|
||||
this.producersByTopic.set(options.topic, producer);
|
||||
return producer as BackendProducer<T>;
|
||||
}
|
||||
|
||||
async createConsumer<T>(options: CreateConsumerOptions): Promise<BackendConsumer<T>> {
|
||||
this.consumerOptions.push(options);
|
||||
if (options.topic === topics.configPush) {
|
||||
return this.configConsumer as unknown as BackendConsumer<T>;
|
||||
}
|
||||
const consumer = new PushConsumer<unknown>();
|
||||
this.consumersByTopic.set(options.topic, consumer);
|
||||
return consumer as BackendConsumer<T>;
|
||||
}
|
||||
|
||||
async close(): Promise<void> {
|
||||
this.closeCount += 1;
|
||||
}
|
||||
|
||||
pushConfig(): void {
|
||||
this.configConsumer.push(
|
||||
createMessage({
|
||||
version: 1,
|
||||
config: {
|
||||
flows: {
|
||||
default: {
|
||||
topics: {
|
||||
"chunk-input": "chunk-input-topic",
|
||||
"chunk-output": "chunk-output-topic",
|
||||
"chunk-triples": "chunk-triples-topic",
|
||||
},
|
||||
parameters: {
|
||||
"chunk-size": 18,
|
||||
"chunk-overlap": 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const fastMessagingConfig = ConfigProvider.layer(
|
||||
ConfigProvider.fromEnv({
|
||||
TG_CONSUMER_RECEIVE_TIMEOUT_MS: "1",
|
||||
TG_CONSUMER_ERROR_BACKOFF_MS: "1",
|
||||
TG_RATE_LIMIT_RETRY_MS: "1",
|
||||
TG_REQUEST_TIMEOUT_MS: "250",
|
||||
}),
|
||||
);
|
||||
|
||||
describe("ChunkingService", () => {
|
||||
it.effect(
|
||||
"handles chunk-input with native Effect flow resources",
|
||||
Effect.fnUntraced(function* () {
|
||||
const backend = new ChunkingBackend();
|
||||
|
||||
yield* Effect.scoped(
|
||||
Effect.gen(function* () {
|
||||
const fiber = yield* runProcessorScoped(
|
||||
{
|
||||
id: "chunking",
|
||||
pubsubUrl: "nats://unused:4222",
|
||||
metricsPort: 8000,
|
||||
manageProcessSignals: true,
|
||||
},
|
||||
(config) => new ChunkingService(config),
|
||||
).pipe(
|
||||
Effect.provide(MessagingRuntimeLive),
|
||||
Effect.provide(PubSub.layer(backend)),
|
||||
Effect.provide(fastMessagingConfig),
|
||||
Effect.forkChild,
|
||||
);
|
||||
|
||||
backend.pushConfig();
|
||||
yield* waitFor(() => backend.consumersByTopic.has("chunk-input-topic"), "chunk consumer");
|
||||
yield* waitFor(() => backend.producersByTopic.has("chunk-output-topic"), "chunk producer");
|
||||
|
||||
const document: TextDocument = {
|
||||
documentId: "doc-1",
|
||||
metadata: {
|
||||
id: "pipeline-1",
|
||||
root: "root-1",
|
||||
user: "user-1",
|
||||
collection: "collection-1",
|
||||
},
|
||||
text: "alpha beta gamma delta epsilon zeta eta theta",
|
||||
};
|
||||
const inputConsumer = backend.consumersByTopic.get("chunk-input-topic") as PushConsumer<TextDocument>;
|
||||
inputConsumer.push(createMessage(document, { id: "request-1" }));
|
||||
|
||||
const outputProducer = backend.producersByTopic.get("chunk-output-topic") as RecordingProducer<Chunk>;
|
||||
const expectedChunks = recursiveSplit(document.text, 18, 0);
|
||||
yield* waitFor(() => outputProducer.sent.length === expectedChunks.length, "chunk outputs");
|
||||
|
||||
expect(inputConsumer.acknowledged.length).toBe(1);
|
||||
expect(inputConsumer.nacked).toEqual([]);
|
||||
expect(outputProducer.sent.map(({ message }) => message.chunk)).toEqual(expectedChunks);
|
||||
expect(outputProducer.sent.every(({ properties }) => properties?.id === "request-1")).toBe(true);
|
||||
|
||||
yield* Fiber.interrupt(fiber);
|
||||
}),
|
||||
);
|
||||
|
||||
expect(backend.closeCount).toBe(1);
|
||||
}),
|
||||
);
|
||||
});
|
||||
82
ts/packages/flow/src/__tests__/ollama-embeddings.test.ts
Normal file
82
ts/packages/flow/src/__tests__/ollama-embeddings.test.ts
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
import { describe, expect, it } from "@effect/vitest";
|
||||
import { Effect } from "effect";
|
||||
import { makeOllamaEmbeddings } from "../embeddings/ollama.js";
|
||||
|
||||
describe("Ollama embeddings provider", () => {
|
||||
it.effect(
|
||||
"posts embedding requests to Ollama",
|
||||
Effect.fnUntraced(function* () {
|
||||
const calls: Array<{ readonly input: RequestInfo | URL; readonly init?: RequestInit }> = [];
|
||||
const fetchImpl = ((input: RequestInfo | URL, init?: RequestInit) => {
|
||||
calls.push(init === undefined ? { input } : { input, init });
|
||||
return Promise.resolve(
|
||||
new Response(JSON.stringify({ embeddings: [[1, 2, 3]] }), {
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
}),
|
||||
);
|
||||
}) as typeof fetch;
|
||||
const embeddings = makeOllamaEmbeddings({
|
||||
id: "embeddings",
|
||||
model: "default-model",
|
||||
ollamaHost: "http://ollama.local",
|
||||
fetch: fetchImpl,
|
||||
});
|
||||
|
||||
const vectors = yield* embeddings.embed(["alpha"], "override-model");
|
||||
|
||||
expect(vectors).toEqual([[1, 2, 3]]);
|
||||
expect(calls).toHaveLength(1);
|
||||
expect(String(calls[0]?.input)).toBe("http://ollama.local/api/embed");
|
||||
expect(calls[0]?.init?.method).toBe("POST");
|
||||
expect(JSON.parse(String(calls[0]?.init?.body))).toEqual({
|
||||
model: "override-model",
|
||||
input: ["alpha"],
|
||||
});
|
||||
}),
|
||||
);
|
||||
|
||||
it.effect(
|
||||
"does not call Ollama for empty requests",
|
||||
Effect.fnUntraced(function* () {
|
||||
const calls: Array<RequestInfo | URL> = [];
|
||||
const fetchImpl = ((input: RequestInfo | URL) => {
|
||||
calls.push(input);
|
||||
return Promise.resolve(new Response(JSON.stringify({ embeddings: [] })));
|
||||
}) as typeof fetch;
|
||||
const embeddings = makeOllamaEmbeddings({
|
||||
id: "embeddings",
|
||||
fetch: fetchImpl,
|
||||
});
|
||||
|
||||
const vectors = yield* embeddings.embed([]);
|
||||
|
||||
expect(vectors).toEqual([]);
|
||||
expect(calls).toEqual([]);
|
||||
}),
|
||||
);
|
||||
|
||||
it.effect(
|
||||
"maps failed Ollama responses to EmbeddingsError",
|
||||
Effect.fnUntraced(function* () {
|
||||
const fetchImpl = (() =>
|
||||
Promise.resolve(
|
||||
new Response("not found", {
|
||||
status: 404,
|
||||
}),
|
||||
)) as typeof fetch;
|
||||
const embeddings = makeOllamaEmbeddings({
|
||||
id: "embeddings",
|
||||
ollamaHost: "http://ollama.local",
|
||||
fetch: fetchImpl,
|
||||
});
|
||||
|
||||
const error = yield* embeddings.embed(["alpha"]).pipe(Effect.flip);
|
||||
|
||||
expect(error._tag).toBe("EmbeddingsError");
|
||||
expect(error.operation).toBe("ollama.embed");
|
||||
expect(error.provider).toBe("ollama");
|
||||
expect(error.message).toContain("Ollama embeddings request failed (404): not found");
|
||||
}),
|
||||
);
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue