This commit is contained in:
elpresidank 2026-05-12 08:06:58 -05:00
parent e8c7a4f6e0
commit ffd97375a8
160 changed files with 6704 additions and 1895 deletions

View file

@ -0,0 +1,230 @@
import { describe, expect, it } from "@effect/vitest";
import { ConfigProvider, Effect, Fiber } from "effect";
import {
MessagingRuntimeLive,
PubSub,
runProcessorScoped,
topics,
type BackendConsumer,
type BackendProducer,
type Chunk,
type CreateConsumerOptions,
type CreateProducerOptions,
type Message,
type PubSubBackend,
type TextDocument,
} from "@trustgraph/base";
import { ChunkingService } from "../chunking/service.js";
import { recursiveSplit } from "../chunking/recursive-splitter.js";
function createMessage<T>(value: T, properties: Record<string, string> = {}): Message<T> {
return {
value: () => value,
properties: () => properties,
};
}
const waitFor = (condition: () => boolean, label: string) =>
Effect.tryPromise({
try: () =>
new Promise<void>((resolve, reject) => {
const deadline = Date.now() + 1000;
const check = () => {
if (condition()) {
resolve();
return;
}
if (Date.now() > deadline) {
reject(new Error(`Timed out waiting for ${label}`));
return;
}
setTimeout(check, 5);
};
check();
}),
catch: (error) => error,
});
class RecordingProducer<T> implements BackendProducer<T> {
readonly sent: Array<{ readonly message: T; readonly properties?: Record<string, string> }> = [];
closeCount = 0;
flushCount = 0;
async send(message: T, properties?: Record<string, string>): Promise<void> {
this.sent.push(properties === undefined ? { message } : { message, properties });
}
async flush(): Promise<void> {
this.flushCount += 1;
}
async close(): Promise<void> {
this.closeCount += 1;
}
}
class PushConsumer<T> implements BackendConsumer<T> {
readonly acknowledged: Array<Message<T>> = [];
readonly nacked: Array<Message<T>> = [];
closeCount = 0;
private readonly messages: Array<Message<T>> = [];
private readonly waiters: Array<(message: Message<T> | null) => void> = [];
private closed = false;
push(message: Message<T>): void {
const waiter = this.waiters.shift();
if (waiter !== undefined) {
waiter(message);
return;
}
this.messages.push(message);
}
async receive(): Promise<Message<T> | null> {
const message = this.messages.shift();
if (message !== undefined || this.closed) {
return message ?? null;
}
return await new Promise((resolve) => {
this.waiters.push(resolve);
});
}
async acknowledge(message: Message<T>): Promise<void> {
this.acknowledged.push(message);
}
async negativeAcknowledge(message: Message<T>): Promise<void> {
this.nacked.push(message);
}
async unsubscribe(): Promise<void> {}
async close(): Promise<void> {
this.closed = true;
for (const waiter of this.waiters.splice(0)) {
waiter(null);
}
this.closeCount += 1;
}
}
class ChunkingBackend implements PubSubBackend {
readonly configConsumer = new PushConsumer<{ readonly version: number; readonly config: Record<string, unknown> }>();
readonly consumersByTopic = new Map<string, PushConsumer<unknown>>();
readonly producersByTopic = new Map<string, RecordingProducer<unknown>>();
readonly producerOptions: Array<CreateProducerOptions> = [];
readonly consumerOptions: Array<CreateConsumerOptions> = [];
closeCount = 0;
async createProducer<T>(options: CreateProducerOptions): Promise<BackendProducer<T>> {
this.producerOptions.push(options);
const producer = new RecordingProducer<unknown>();
this.producersByTopic.set(options.topic, producer);
return producer as BackendProducer<T>;
}
async createConsumer<T>(options: CreateConsumerOptions): Promise<BackendConsumer<T>> {
this.consumerOptions.push(options);
if (options.topic === topics.configPush) {
return this.configConsumer as unknown as BackendConsumer<T>;
}
const consumer = new PushConsumer<unknown>();
this.consumersByTopic.set(options.topic, consumer);
return consumer as BackendConsumer<T>;
}
async close(): Promise<void> {
this.closeCount += 1;
}
pushConfig(): void {
this.configConsumer.push(
createMessage({
version: 1,
config: {
flows: {
default: {
topics: {
"chunk-input": "chunk-input-topic",
"chunk-output": "chunk-output-topic",
"chunk-triples": "chunk-triples-topic",
},
parameters: {
"chunk-size": 18,
"chunk-overlap": 0,
},
},
},
},
}),
);
}
}
const fastMessagingConfig = ConfigProvider.layer(
ConfigProvider.fromEnv({
TG_CONSUMER_RECEIVE_TIMEOUT_MS: "1",
TG_CONSUMER_ERROR_BACKOFF_MS: "1",
TG_RATE_LIMIT_RETRY_MS: "1",
TG_REQUEST_TIMEOUT_MS: "250",
}),
);
describe("ChunkingService", () => {
it.effect(
"handles chunk-input with native Effect flow resources",
Effect.fnUntraced(function* () {
const backend = new ChunkingBackend();
yield* Effect.scoped(
Effect.gen(function* () {
const fiber = yield* runProcessorScoped(
{
id: "chunking",
pubsubUrl: "nats://unused:4222",
metricsPort: 8000,
manageProcessSignals: true,
},
(config) => new ChunkingService(config),
).pipe(
Effect.provide(MessagingRuntimeLive),
Effect.provide(PubSub.layer(backend)),
Effect.provide(fastMessagingConfig),
Effect.forkChild,
);
backend.pushConfig();
yield* waitFor(() => backend.consumersByTopic.has("chunk-input-topic"), "chunk consumer");
yield* waitFor(() => backend.producersByTopic.has("chunk-output-topic"), "chunk producer");
const document: TextDocument = {
documentId: "doc-1",
metadata: {
id: "pipeline-1",
root: "root-1",
user: "user-1",
collection: "collection-1",
},
text: "alpha beta gamma delta epsilon zeta eta theta",
};
const inputConsumer = backend.consumersByTopic.get("chunk-input-topic") as PushConsumer<TextDocument>;
inputConsumer.push(createMessage(document, { id: "request-1" }));
const outputProducer = backend.producersByTopic.get("chunk-output-topic") as RecordingProducer<Chunk>;
const expectedChunks = recursiveSplit(document.text, 18, 0);
yield* waitFor(() => outputProducer.sent.length === expectedChunks.length, "chunk outputs");
expect(inputConsumer.acknowledged.length).toBe(1);
expect(inputConsumer.nacked).toEqual([]);
expect(outputProducer.sent.map(({ message }) => message.chunk)).toEqual(expectedChunks);
expect(outputProducer.sent.every(({ properties }) => properties?.id === "request-1")).toBe(true);
yield* Fiber.interrupt(fiber);
}),
);
expect(backend.closeCount).toBe(1);
}),
);
});

View file

@ -0,0 +1,82 @@
import { describe, expect, it } from "@effect/vitest";
import { Effect } from "effect";
import { makeOllamaEmbeddings } from "../embeddings/ollama.js";
describe("Ollama embeddings provider", () => {
it.effect(
"posts embedding requests to Ollama",
Effect.fnUntraced(function* () {
const calls: Array<{ readonly input: RequestInfo | URL; readonly init?: RequestInit }> = [];
const fetchImpl = ((input: RequestInfo | URL, init?: RequestInit) => {
calls.push(init === undefined ? { input } : { input, init });
return Promise.resolve(
new Response(JSON.stringify({ embeddings: [[1, 2, 3]] }), {
status: 200,
headers: { "Content-Type": "application/json" },
}),
);
}) as typeof fetch;
const embeddings = makeOllamaEmbeddings({
id: "embeddings",
model: "default-model",
ollamaHost: "http://ollama.local",
fetch: fetchImpl,
});
const vectors = yield* embeddings.embed(["alpha"], "override-model");
expect(vectors).toEqual([[1, 2, 3]]);
expect(calls).toHaveLength(1);
expect(String(calls[0]?.input)).toBe("http://ollama.local/api/embed");
expect(calls[0]?.init?.method).toBe("POST");
expect(JSON.parse(String(calls[0]?.init?.body))).toEqual({
model: "override-model",
input: ["alpha"],
});
}),
);
it.effect(
"does not call Ollama for empty requests",
Effect.fnUntraced(function* () {
const calls: Array<RequestInfo | URL> = [];
const fetchImpl = ((input: RequestInfo | URL) => {
calls.push(input);
return Promise.resolve(new Response(JSON.stringify({ embeddings: [] })));
}) as typeof fetch;
const embeddings = makeOllamaEmbeddings({
id: "embeddings",
fetch: fetchImpl,
});
const vectors = yield* embeddings.embed([]);
expect(vectors).toEqual([]);
expect(calls).toEqual([]);
}),
);
it.effect(
"maps failed Ollama responses to EmbeddingsError",
Effect.fnUntraced(function* () {
const fetchImpl = (() =>
Promise.resolve(
new Response("not found", {
status: 404,
}),
)) as typeof fetch;
const embeddings = makeOllamaEmbeddings({
id: "embeddings",
ollamaHost: "http://ollama.local",
fetch: fetchImpl,
});
const error = yield* embeddings.embed(["alpha"]).pipe(Effect.flip);
expect(error._tag).toBe("EmbeddingsError");
expect(error.operation).toBe("ollama.embed");
expect(error.provider).toBe("ollama");
expect(error.message).toContain("Ollama embeddings request failed (404): not found");
}),
);
});