diff --git a/templates/components/embeddings-fastembed.jsonnet b/templates/components/embeddings-fastembed.jsonnet new file mode 100644 index 00000000..a515617f --- /dev/null +++ b/templates/components/embeddings-fastembed.jsonnet @@ -0,0 +1,43 @@ +local base = import "base/base.jsonnet"; +local images = import "values/images.jsonnet"; +local url = import "values/url.jsonnet"; +local prompts = import "prompts/mixtral.jsonnet"; + +{ + + "embeddings-model":: "sentence-transformers/all-MiniLM-L6-v2", + + embeddings +: { + + create:: function(engine) + + local container = + engine.container("embeddings") + .with_image(images.trustgraph) + .with_command([ + "embeddings-fastembed", + "-p", + url.pulsar, + "-m", + $["embeddings-model"], + ]) + .with_limits("1.0", "400M") + .with_reservations("0.5", "400M"); + + local containerSet = engine.containers( + "embeddings", [ container ] + ); + + local service = + engine.internalService(containerSet) + .with_port(8000, 8000, "metrics"); + + engine.resources([ + containerSet, + service, + ]) + + }, + +} + diff --git a/trustgraph-flow/scripts/embeddings-fastembed b/trustgraph-flow/scripts/embeddings-fastembed new file mode 100755 index 00000000..e1322269 --- /dev/null +++ b/trustgraph-flow/scripts/embeddings-fastembed @@ -0,0 +1,6 @@ +#!/usr/bin/env python3 + +from trustgraph.embeddings.fastembed import run + +run() + diff --git a/trustgraph-flow/setup.py b/trustgraph-flow/setup.py index f5a0bc3c..a6191cf5 100644 --- a/trustgraph-flow/setup.py +++ b/trustgraph-flow/setup.py @@ -41,6 +41,7 @@ setuptools.setup( "cohere", "cryptography", "falkordb", + "fastembed", "google-generativeai", "ibis", "jsonschema", @@ -78,6 +79,7 @@ setuptools.setup( "scripts/document-embeddings", "scripts/document-rag", "scripts/embeddings-ollama", + "scripts/embeddings-fastembed", "scripts/ge-query-milvus", "scripts/ge-query-pinecone", "scripts/ge-query-qdrant", diff --git a/trustgraph-flow/trustgraph/embeddings/fastembed/__init__.py b/trustgraph-flow/trustgraph/embeddings/fastembed/__init__.py new file mode 100644 index 00000000..9d16af90 --- /dev/null +++ b/trustgraph-flow/trustgraph/embeddings/fastembed/__init__.py @@ -0,0 +1,3 @@ + +from . processor import * + diff --git a/trustgraph-flow/trustgraph/embeddings/fastembed/__main__.py b/trustgraph-flow/trustgraph/embeddings/fastembed/__main__.py new file mode 100755 index 00000000..986c0257 --- /dev/null +++ b/trustgraph-flow/trustgraph/embeddings/fastembed/__main__.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python3 + +from . processor import run + +if __name__ == '__main__': + run() + diff --git a/trustgraph-flow/trustgraph/embeddings/fastembed/processor.py b/trustgraph-flow/trustgraph/embeddings/fastembed/processor.py new file mode 100755 index 00000000..635387b8 --- /dev/null +++ b/trustgraph-flow/trustgraph/embeddings/fastembed/processor.py @@ -0,0 +1,89 @@ + +""" +Embeddings service, applies an embeddings model selected from HuggingFace. +Input is text, output is embeddings vector. +""" + +from ... schema import EmbeddingsRequest, EmbeddingsResponse +from ... schema import embeddings_request_queue, embeddings_response_queue +from ... log_level import LogLevel +from ... base import ConsumerProducer +from fastembed import TextEmbedding +import os + +module = ".".join(__name__.split(".")[1:-1]) + +default_input_queue = embeddings_request_queue +default_output_queue = embeddings_response_queue +default_subscriber = module +default_model="sentence-transformers/all-MiniLM-L6-v2" + +class Processor(ConsumerProducer): + + def __init__(self, **params): + + input_queue = params.get("input_queue", default_input_queue) + output_queue = params.get("output_queue", default_output_queue) + subscriber = params.get("subscriber", default_subscriber) + + model = params.get("model", default_model) + + super(Processor, self).__init__( + **params | { + "input_queue": input_queue, + "output_queue": output_queue, + "subscriber": subscriber, + "input_schema": EmbeddingsRequest, + "output_schema": EmbeddingsResponse, + "model": model, + } + ) + + self.embeddings = TextEmbedding(model_name = model) + + def handle(self, msg): + + v = msg.value() + + # Sender-produced ID + + id = msg.properties()["id"] + + print(f"Handling input {id}...", flush=True) + + text = v.text + vecs = self.embeddings.embed([text]) + + vecs = [ + v.tolist() + for v in vecs + ] + + print("Send response...", flush=True) + r = EmbeddingsResponse( + vectors=list(vecs), + error=None, + ) + + self.producer.send(r, properties={"id": id}) + + print("Done.", flush=True) + + @staticmethod + def add_args(parser): + + ConsumerProducer.add_args( + parser, default_input_queue, default_subscriber, + default_output_queue, + ) + + parser.add_argument( + '-m', '--model', + default=default_model, + help=f'Embeddings model (default: {default_model})' + ) + +def run(): + + Processor.start(module, __doc__) + diff --git a/trustgraph-flow/trustgraph/embeddings/ollama/processor.py b/trustgraph-flow/trustgraph/embeddings/ollama/processor.py index fc54cbb8..5baf64aa 100755 --- a/trustgraph-flow/trustgraph/embeddings/ollama/processor.py +++ b/trustgraph-flow/trustgraph/embeddings/ollama/processor.py @@ -1,6 +1,6 @@ """ -Embeddings service, applies an embeddings model selected from HuggingFace. +Embeddings service, applies an embeddings model hosted on a local Ollama. Input is text, output is embeddings vector. """