Feature/subpackages (#80)

* Renaming what will become the core package

* Tweaking to get  package build working

* Fix metering merge

* Rename to core directory

* Bump version.  Use namespace searching for packaging trustgraph-core

* Change references to trustgraph-core

* Forming embeddings-hf package

* Reference modules in core package.

* Build both packages to one container, bump version

* Update YAMLs
This commit is contained in:
cybermaggedon 2024-09-30 14:00:29 +01:00 committed by GitHub
parent 14d79ef9f1
commit f081933217
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
303 changed files with 681 additions and 624 deletions

View file

@ -0,0 +1,6 @@
#!/usr/bin/env python3
from trustgraph.embeddings_hf.embeddings.hf import run
run()

View file

@ -0,0 +1,47 @@
import setuptools
import os
with open("README.md", "r") as fh:
long_description = fh.read()
version = "0.11.6"
setuptools.setup(
name="trustgraph-embeddings-hf",
version=version,
author="trustgraph.ai",
author_email="security@trustgraph.ai",
description="HuggingFace embeddings support for TrustGraph.",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/trustgraph-ai/trustgraph",
packages=setuptools.find_namespace_packages(
where='./',
# include=['trustgraph.core']
),
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
"Operating System :: OS Independent",
],
python_requires='>=3.8',
download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
install_requires=[
"trustgraph-core",
"torch",
"urllib3",
"transformers",
"sentence-transformers",
"langchain",
"langchain-core",
"langchain-huggingface",
"langchain-community",
"huggingface-hub",
"pulsar-client",
"pyyaml",
"prometheus-client",
],
scripts=[
"scripts/embeddings-hf",
]
)

View file

@ -0,0 +1,3 @@
from . hf import *

View file

@ -0,0 +1,7 @@
#!/usr/bin/env python3
from . hf import run
if __name__ == '__main__':
run()

View file

@ -0,0 +1,100 @@
"""
Embeddings service, applies an embeddings model selected from HuggingFace.
Input is text, output is embeddings vector.
"""
from langchain_huggingface import HuggingFaceEmbeddings
from trustgraph.core.schema import EmbeddingsRequest, EmbeddingsResponse, Error
from trustgraph.core.schema import embeddings_request_queue
from trustgraph.core.schema import embeddings_response_queue
from trustgraph.core.log_level import LogLevel
from trustgraph.core.base import ConsumerProducer
module = ".".join(__name__.split(".")[1:-1])
default_input_queue = embeddings_request_queue
default_output_queue = embeddings_response_queue
default_subscriber = module
default_model="all-MiniLM-L6-v2"
class Processor(ConsumerProducer):
def __init__(self, **params):
input_queue = params.get("input_queue", default_input_queue)
output_queue = params.get("output_queue", default_output_queue)
subscriber = params.get("subscriber", default_subscriber)
model = params.get("model", default_model)
super(Processor, self).__init__(
**params | {
"input_queue": input_queue,
"output_queue": output_queue,
"subscriber": subscriber,
"input_schema": EmbeddingsRequest,
"output_schema": EmbeddingsResponse,
}
)
self.embeddings = HuggingFaceEmbeddings(model_name=model)
def handle(self, msg):
v = msg.value()
# Sender-produced ID
id = msg.properties()["id"]
print(f"Handling input {id}...", flush=True)
try:
text = v.text
embeds = self.embeddings.embed_documents([text])
print("Send response...", flush=True)
r = EmbeddingsResponse(vectors=embeds, error=None)
self.producer.send(r, properties={"id": id})
print("Done.", flush=True)
except Exception as e:
print(f"Exception: {e}")
print("Send error response...", flush=True)
r = EmbeddingsResponse(
error=Error(
type = "llm-error",
message = str(e),
),
response=None,
)
self.producer.send(r, properties={"id": id})
self.consumer.acknowledge(msg)
@staticmethod
def add_args(parser):
ConsumerProducer.add_args(
parser, default_input_queue, default_subscriber,
default_output_queue,
)
parser.add_argument(
'-m', '--model',
default="all-MiniLM-L6-v2",
help=f'LLM model (default: all-MiniLM-L6-v2)'
)
def run():
Processor.start(module, __doc__)