mirror of
https://github.com/katanemo/plano.git
synced 2026-06-26 15:39:40 +02:00
improve service names (#54)
- embedding-server => model_server - public-types => public_types - chatbot-ui => chatbot_ui - function-calling => function_calling
This commit is contained in:
parent
215f96e273
commit
060a0d665e
35 changed files with 54 additions and 52 deletions
16
model_server/.vscode/launch.json
vendored
Normal file
16
model_server/.vscode/launch.json
vendored
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "embedding server",
|
||||
"cwd": "${workspaceFolder}/app",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"module": "uvicorn",
|
||||
"args": ["main:app","--reload", "--port", "8000"],
|
||||
}
|
||||
]
|
||||
}
|
||||
43
model_server/Dockerfile
Normal file
43
model_server/Dockerfile
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
FROM python:3 AS base
|
||||
|
||||
#
|
||||
# builder
|
||||
#
|
||||
FROM base AS builder
|
||||
|
||||
WORKDIR /src
|
||||
|
||||
COPY requirements.txt /src/
|
||||
RUN pip install --prefix=/runtime --force-reinstall -r requirements.txt
|
||||
|
||||
COPY . /src
|
||||
|
||||
#
|
||||
# output
|
||||
#
|
||||
|
||||
FROM python:3-slim AS output
|
||||
|
||||
# specify list of models that will go into the image as a comma separated list
|
||||
# following models have been tested to work with this image
|
||||
# "sentence-transformers/all-MiniLM-L6-v2,sentence-transformers/all-mpnet-base-v2,thenlper/gte-base,thenlper/gte-large,thenlper/gte-small"
|
||||
ENV MODELS="BAAI/bge-large-en-v1.5"
|
||||
ENV NER_MODELS="urchade/gliner_large-v2.1"
|
||||
|
||||
COPY --from=builder /runtime /usr/local
|
||||
|
||||
COPY /app /app
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && apt-get install -y \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# comment it out for now as we don't want to download the model every time we build the image
|
||||
# we will mount host cache to docker image to avoid downloading the model every time
|
||||
# see docker-compose file for more details
|
||||
|
||||
# RUN python install.py && \
|
||||
# find /root/.cache/torch/sentence_transformers/ -name onnx -exec rm -rf {} +
|
||||
|
||||
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]
|
||||
6
model_server/app/install.py
Normal file
6
model_server/app/install.py
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
from load_models import load_transformers, load_ner_models
|
||||
|
||||
print('installing transformers')
|
||||
load_transformers()
|
||||
print('installing ner models')
|
||||
load_ner_models()
|
||||
28
model_server/app/load_models.py
Normal file
28
model_server/app/load_models.py
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
import os
|
||||
import sentence_transformers
|
||||
from gliner import GLiNER
|
||||
from transformers import pipeline
|
||||
|
||||
def load_transformers(models = os.getenv("MODELS", "BAAI/bge-large-en-v1.5")):
|
||||
transformers = {}
|
||||
|
||||
for model in models.split(','):
|
||||
transformers[model] = sentence_transformers.SentenceTransformer(model)
|
||||
|
||||
return transformers
|
||||
|
||||
def load_ner_models(models = os.getenv("NER_MODELS", "urchade/gliner_large-v2.1")):
|
||||
ner_models = {}
|
||||
|
||||
for model in models.split(','):
|
||||
ner_models[model] = GLiNER.from_pretrained(model)
|
||||
|
||||
return ner_models
|
||||
|
||||
def load_zero_shot_models(models = os.getenv("ZERO_SHOT_MODELS", "tasksource/deberta-base-long-nli")):
|
||||
zero_shot_models = {}
|
||||
|
||||
for model in models.split(','):
|
||||
zero_shot_models[model] = pipeline("zero-shot-classification",model=model)
|
||||
|
||||
return zero_shot_models
|
||||
145
model_server/app/main.py
Normal file
145
model_server/app/main.py
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
import random
|
||||
from fastapi import FastAPI, Response, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from load_models import load_ner_models, load_transformers, load_zero_shot_models
|
||||
from datetime import date, timedelta
|
||||
import string
|
||||
|
||||
transformers = load_transformers()
|
||||
ner_models = load_ner_models()
|
||||
zero_shot_models = load_zero_shot_models()
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
class EmbeddingRequest(BaseModel):
|
||||
input: str
|
||||
model: str
|
||||
|
||||
@app.get("/healthz")
|
||||
async def healthz():
|
||||
return {
|
||||
"status": "ok"
|
||||
}
|
||||
|
||||
@app.get("/models")
|
||||
async def models():
|
||||
models = []
|
||||
|
||||
for model in transformers.keys():
|
||||
models.append({
|
||||
"id": model,
|
||||
"object": "model"
|
||||
})
|
||||
|
||||
return {
|
||||
"data": models,
|
||||
"object": "list"
|
||||
}
|
||||
|
||||
@app.post("/embeddings")
|
||||
async def embedding(req: EmbeddingRequest, res: Response):
|
||||
if req.model not in transformers:
|
||||
raise HTTPException(status_code=400, detail="unknown model: " + req.model)
|
||||
|
||||
embeddings = transformers[req.model].encode([req.input])
|
||||
|
||||
data = []
|
||||
|
||||
for embedding in embeddings.tolist():
|
||||
data.append({
|
||||
"object": "embedding",
|
||||
"embedding": embedding,
|
||||
"index": len(data)
|
||||
})
|
||||
|
||||
usage = {
|
||||
"prompt_tokens": 0,
|
||||
"total_tokens": 0,
|
||||
}
|
||||
return {
|
||||
"data": data,
|
||||
"model": req.model,
|
||||
"object": "list",
|
||||
"usage": usage
|
||||
}
|
||||
|
||||
class NERRequest(BaseModel):
|
||||
input: str
|
||||
labels: list[str]
|
||||
model: str
|
||||
|
||||
|
||||
@app.post("/ner")
|
||||
async def ner(req: NERRequest, res: Response):
|
||||
if req.model not in ner_models:
|
||||
raise HTTPException(status_code=400, detail="unknown model: " + req.model)
|
||||
|
||||
model = ner_models[req.model]
|
||||
entities = model.predict_entities(req.input, req.labels)
|
||||
|
||||
return {
|
||||
"data": entities,
|
||||
"model": req.model,
|
||||
"object": "list",
|
||||
}
|
||||
|
||||
class ZeroShotRequest(BaseModel):
|
||||
input: str
|
||||
labels: list[str]
|
||||
model: str
|
||||
|
||||
|
||||
def remove_punctuations(s, lower=True):
|
||||
s = s.translate(str.maketrans(string.punctuation, " " * len(string.punctuation)))
|
||||
s = " ".join(s.split())
|
||||
if lower:
|
||||
s = s.lower()
|
||||
return s
|
||||
|
||||
|
||||
@app.post("/zeroshot")
|
||||
async def zeroshot(req: ZeroShotRequest, res: Response):
|
||||
if req.model not in zero_shot_models:
|
||||
raise HTTPException(status_code=400, detail="unknown model: " + req.model)
|
||||
|
||||
classifier = zero_shot_models[req.model]
|
||||
labels_without_punctuations = [remove_punctuations(label) for label in req.labels]
|
||||
predicted_classes = classifier(req.input, candidate_labels=labels_without_punctuations, multi_label=True)
|
||||
label_map = dict(zip(labels_without_punctuations, req.labels))
|
||||
|
||||
orig_map = [label_map[label] for label in predicted_classes["labels"]]
|
||||
final_scores = dict(zip(orig_map, predicted_classes["scores"]))
|
||||
predicted_class = label_map[predicted_classes["labels"][0]]
|
||||
|
||||
return {
|
||||
"predicted_class": predicted_class,
|
||||
"predicted_class_score": final_scores[predicted_class],
|
||||
"scores": final_scores,
|
||||
"model": req.model,
|
||||
}
|
||||
|
||||
|
||||
class WeatherRequest(BaseModel):
|
||||
city: str
|
||||
|
||||
|
||||
@app.post("/weather")
|
||||
async def weather(req: WeatherRequest, res: Response):
|
||||
|
||||
weather_forecast = {
|
||||
"city": req.city,
|
||||
"temperature": [],
|
||||
"unit": "F",
|
||||
}
|
||||
for i in range(7):
|
||||
min_temp = random.randrange(50,90)
|
||||
max_temp = random.randrange(min_temp+5, min_temp+20)
|
||||
weather_forecast["temperature"].append({
|
||||
"date": str(date.today() + timedelta(days=i)),
|
||||
"temperature": {
|
||||
"min": min_temp,
|
||||
"max": max_temp
|
||||
}
|
||||
})
|
||||
|
||||
return weather_forecast
|
||||
6
model_server/requirements.txt
Normal file
6
model_server/requirements.txt
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
#TOOD: pin versions
|
||||
fastapi
|
||||
sentence-transformers
|
||||
torch
|
||||
uvicorn
|
||||
gliner
|
||||
Loading…
Add table
Add a link
Reference in a new issue