Feature/fastembed (#286)

* Copied in base files for fastembed

* Added fastembed implementation

* Added template support for fastembed
This commit is contained in:
cybermaggedon 2025-01-28 18:26:17 +00:00 committed by GitHub
parent 75a72b0d2d
commit 6c3d2e7f97
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 151 additions and 1 deletions

View file

@ -0,0 +1,43 @@
local base = import "base/base.jsonnet";
local images = import "values/images.jsonnet";
local url = import "values/url.jsonnet";
local prompts = import "prompts/mixtral.jsonnet";
{
"embeddings-model":: "sentence-transformers/all-MiniLM-L6-v2",
embeddings +: {
create:: function(engine)
local container =
engine.container("embeddings")
.with_image(images.trustgraph)
.with_command([
"embeddings-fastembed",
"-p",
url.pulsar,
"-m",
$["embeddings-model"],
])
.with_limits("1.0", "400M")
.with_reservations("0.5", "400M");
local containerSet = engine.containers(
"embeddings", [ container ]
);
local service =
engine.internalService(containerSet)
.with_port(8000, 8000, "metrics");
engine.resources([
containerSet,
service,
])
},
}

View file

@ -0,0 +1,6 @@
#!/usr/bin/env python3
from trustgraph.embeddings.fastembed import run
run()

View file

@ -41,6 +41,7 @@ setuptools.setup(
"cohere",
"cryptography",
"falkordb",
"fastembed",
"google-generativeai",
"ibis",
"jsonschema",
@ -78,6 +79,7 @@ setuptools.setup(
"scripts/document-embeddings",
"scripts/document-rag",
"scripts/embeddings-ollama",
"scripts/embeddings-fastembed",
"scripts/ge-query-milvus",
"scripts/ge-query-pinecone",
"scripts/ge-query-qdrant",

View file

@ -0,0 +1,3 @@
from . processor import *

View file

@ -0,0 +1,7 @@
#!/usr/bin/env python3
from . processor import run
if __name__ == '__main__':
run()

View file

@ -0,0 +1,89 @@
"""
Embeddings service, applies an embeddings model selected from HuggingFace.
Input is text, output is embeddings vector.
"""
from ... schema import EmbeddingsRequest, EmbeddingsResponse
from ... schema import embeddings_request_queue, embeddings_response_queue
from ... log_level import LogLevel
from ... base import ConsumerProducer
from fastembed import TextEmbedding
import os
module = ".".join(__name__.split(".")[1:-1])
default_input_queue = embeddings_request_queue
default_output_queue = embeddings_response_queue
default_subscriber = module
default_model="sentence-transformers/all-MiniLM-L6-v2"
class Processor(ConsumerProducer):
def __init__(self, **params):
input_queue = params.get("input_queue", default_input_queue)
output_queue = params.get("output_queue", default_output_queue)
subscriber = params.get("subscriber", default_subscriber)
model = params.get("model", default_model)
super(Processor, self).__init__(
**params | {
"input_queue": input_queue,
"output_queue": output_queue,
"subscriber": subscriber,
"input_schema": EmbeddingsRequest,
"output_schema": EmbeddingsResponse,
"model": model,
}
)
self.embeddings = TextEmbedding(model_name = model)
def handle(self, msg):
v = msg.value()
# Sender-produced ID
id = msg.properties()["id"]
print(f"Handling input {id}...", flush=True)
text = v.text
vecs = self.embeddings.embed([text])
vecs = [
v.tolist()
for v in vecs
]
print("Send response...", flush=True)
r = EmbeddingsResponse(
vectors=list(vecs),
error=None,
)
self.producer.send(r, properties={"id": id})
print("Done.", flush=True)
@staticmethod
def add_args(parser):
ConsumerProducer.add_args(
parser, default_input_queue, default_subscriber,
default_output_queue,
)
parser.add_argument(
'-m', '--model',
default=default_model,
help=f'Embeddings model (default: {default_model})'
)
def run():
Processor.start(module, __doc__)

View file

@ -1,6 +1,6 @@
"""
Embeddings service, applies an embeddings model selected from HuggingFace.
Embeddings service, applies an embeddings model hosted on a local Ollama.
Input is text, output is embeddings vector.
"""