improve service names (#54)

- embedding-server => model_server
- public-types => public_types
- chatbot-ui => chatbot_ui
- function-calling => function_calling
This commit is contained in:
Adil Hafeez 2024-09-17 08:47:35 -07:00 committed by GitHub
parent 215f96e273
commit 060a0d665e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
35 changed files with 54 additions and 52 deletions

16
model_server/.vscode/launch.json vendored Normal file
View file

@ -0,0 +1,16 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "embedding server",
"cwd": "${workspaceFolder}/app",
"type": "debugpy",
"request": "launch",
"module": "uvicorn",
"args": ["main:app","--reload", "--port", "8000"],
}
]
}

43
model_server/Dockerfile Normal file
View file

@ -0,0 +1,43 @@
FROM python:3 AS base
#
# builder
#
FROM base AS builder
WORKDIR /src
COPY requirements.txt /src/
RUN pip install --prefix=/runtime --force-reinstall -r requirements.txt
COPY . /src
#
# output
#
FROM python:3-slim AS output
# specify list of models that will go into the image as a comma separated list
# following models have been tested to work with this image
# "sentence-transformers/all-MiniLM-L6-v2,sentence-transformers/all-mpnet-base-v2,thenlper/gte-base,thenlper/gte-large,thenlper/gte-small"
ENV MODELS="BAAI/bge-large-en-v1.5"
ENV NER_MODELS="urchade/gliner_large-v2.1"
COPY --from=builder /runtime /usr/local
COPY /app /app
WORKDIR /app
RUN apt-get update && apt-get install -y \
curl \
&& rm -rf /var/lib/apt/lists/*
# comment it out for now as we don't want to download the model every time we build the image
# we will mount host cache to docker image to avoid downloading the model every time
# see docker-compose file for more details
# RUN python install.py && \
# find /root/.cache/torch/sentence_transformers/ -name onnx -exec rm -rf {} +
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]

View file

@ -0,0 +1,6 @@
from load_models import load_transformers, load_ner_models
print('installing transformers')
load_transformers()
print('installing ner models')
load_ner_models()

View file

@ -0,0 +1,28 @@
import os
import sentence_transformers
from gliner import GLiNER
from transformers import pipeline
def load_transformers(models = os.getenv("MODELS", "BAAI/bge-large-en-v1.5")):
transformers = {}
for model in models.split(','):
transformers[model] = sentence_transformers.SentenceTransformer(model)
return transformers
def load_ner_models(models = os.getenv("NER_MODELS", "urchade/gliner_large-v2.1")):
ner_models = {}
for model in models.split(','):
ner_models[model] = GLiNER.from_pretrained(model)
return ner_models
def load_zero_shot_models(models = os.getenv("ZERO_SHOT_MODELS", "tasksource/deberta-base-long-nli")):
zero_shot_models = {}
for model in models.split(','):
zero_shot_models[model] = pipeline("zero-shot-classification",model=model)
return zero_shot_models

145
model_server/app/main.py Normal file
View file

@ -0,0 +1,145 @@
import random
from fastapi import FastAPI, Response, HTTPException
from pydantic import BaseModel
from load_models import load_ner_models, load_transformers, load_zero_shot_models
from datetime import date, timedelta
import string
transformers = load_transformers()
ner_models = load_ner_models()
zero_shot_models = load_zero_shot_models()
app = FastAPI()
class EmbeddingRequest(BaseModel):
input: str
model: str
@app.get("/healthz")
async def healthz():
return {
"status": "ok"
}
@app.get("/models")
async def models():
models = []
for model in transformers.keys():
models.append({
"id": model,
"object": "model"
})
return {
"data": models,
"object": "list"
}
@app.post("/embeddings")
async def embedding(req: EmbeddingRequest, res: Response):
if req.model not in transformers:
raise HTTPException(status_code=400, detail="unknown model: " + req.model)
embeddings = transformers[req.model].encode([req.input])
data = []
for embedding in embeddings.tolist():
data.append({
"object": "embedding",
"embedding": embedding,
"index": len(data)
})
usage = {
"prompt_tokens": 0,
"total_tokens": 0,
}
return {
"data": data,
"model": req.model,
"object": "list",
"usage": usage
}
class NERRequest(BaseModel):
input: str
labels: list[str]
model: str
@app.post("/ner")
async def ner(req: NERRequest, res: Response):
if req.model not in ner_models:
raise HTTPException(status_code=400, detail="unknown model: " + req.model)
model = ner_models[req.model]
entities = model.predict_entities(req.input, req.labels)
return {
"data": entities,
"model": req.model,
"object": "list",
}
class ZeroShotRequest(BaseModel):
input: str
labels: list[str]
model: str
def remove_punctuations(s, lower=True):
s = s.translate(str.maketrans(string.punctuation, " " * len(string.punctuation)))
s = " ".join(s.split())
if lower:
s = s.lower()
return s
@app.post("/zeroshot")
async def zeroshot(req: ZeroShotRequest, res: Response):
if req.model not in zero_shot_models:
raise HTTPException(status_code=400, detail="unknown model: " + req.model)
classifier = zero_shot_models[req.model]
labels_without_punctuations = [remove_punctuations(label) for label in req.labels]
predicted_classes = classifier(req.input, candidate_labels=labels_without_punctuations, multi_label=True)
label_map = dict(zip(labels_without_punctuations, req.labels))
orig_map = [label_map[label] for label in predicted_classes["labels"]]
final_scores = dict(zip(orig_map, predicted_classes["scores"]))
predicted_class = label_map[predicted_classes["labels"][0]]
return {
"predicted_class": predicted_class,
"predicted_class_score": final_scores[predicted_class],
"scores": final_scores,
"model": req.model,
}
class WeatherRequest(BaseModel):
city: str
@app.post("/weather")
async def weather(req: WeatherRequest, res: Response):
weather_forecast = {
"city": req.city,
"temperature": [],
"unit": "F",
}
for i in range(7):
min_temp = random.randrange(50,90)
max_temp = random.randrange(min_temp+5, min_temp+20)
weather_forecast["temperature"].append({
"date": str(date.today() + timedelta(days=i)),
"temperature": {
"min": min_temp,
"max": max_temp
}
})
return weather_forecast

View file

@ -0,0 +1,6 @@
#TOOD: pin versions
fastapi
sentence-transformers
torch
uvicorn
gliner