Fold function_resolver into model_server (#103)

This commit is contained in:
Adil Hafeez 2024-10-01 09:13:50 -07:00 committed by GitHub
parent b0ce5eca93
commit f4395d39f9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
24 changed files with 31 additions and 197 deletions

View file

@ -171,7 +171,7 @@ static_resources:
- endpoint:
address:
socket_address:
address: function_resolver
address: model_server
port_value: 80
hostname: "arch_fc"
{% for _, cluster in arch_clusters.items() %}

View file

@ -4,6 +4,5 @@ COPY config_generator/requirements.txt .
RUN pip install -r requirements.txt
COPY config_generator/config_generator.py .
COPY arch/envoy.template.yaml .
COPY arch/katanemo-config.yaml .
CMD ["python", "config_generator.py"]

View file

@ -41,23 +41,10 @@ services:
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
- ./arch_config.yaml:/root/arch_config.yaml
function_resolver:
build:
context: ../../function_resolver
dockerfile: Dockerfile
ports:
- "18082:80"
healthcheck:
test: ["CMD", "curl" ,"http://localhost:80/healthz"]
interval: 5s
retries: 20
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
environment:
# use ollama endpoint that is hosted by host machine (no virtualization)
- OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-host.docker.internal}
- OLLAMA_MODEL=Arch-Function-Calling-3B-Q4_K_M
# use ollama endpoint that is hosted by host machine (no virtualization)
# uncomment following line to use ollama endpoint that is hosted by docker
# - OLLAMA_ENDPOINT=ollama
# - OLLAMA_MODEL=Arch-Function-Calling-1.5B:Q4_K_M

View file

@ -1,16 +0,0 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "function resolver server",
"cwd": "${workspaceFolder}/app",
"type": "debugpy",
"request": "launch",
"module": "uvicorn",
"args": ["main:app","--reload", "--host", "0.0.0.0", "--port", "8001", "--log-config", "logger.yaml"],
}
]
}

View file

@ -1,30 +0,0 @@
FROM python:3 AS base
#
# builder
#
FROM base AS builder
WORKDIR /src
COPY requirements.txt /src/
RUN pip install --prefix=/runtime --force-reinstall -r requirements.txt
COPY . /src
#
# output
#
FROM python:3-slim AS output
COPY --from=builder /runtime /usr/local
COPY /app /app
WORKDIR /app
RUN apt-get update && apt-get install -y \
curl \
&& rm -rf /var/lib/apt/lists/*
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80", "--log-config", "logger.yaml"]

View file

@ -1,4 +0,0 @@
fastapi
uvicorn
PyYAML
openai

View file

@ -1,5 +0,0 @@
PORT="${PORT:-8001}"
echo localhost:$PORT/v1/chat/completions
curl -H "content-type: application/json" -XPOST localhost:$PORT/v1/chat/completions -d @test_payload.json

View file

@ -1,5 +0,0 @@
PORT="${PORT:-8001}"
echo localhost:$PORT/bolt_fc_1b/v1/chat/completions
curl -v -H "content-type: application/json" -XPOST localhost:$PORT/bolt_fc_1b/v1/chat/completions -d @test_payload.json

View file

@ -1 +0,0 @@
curl -H "content-type: application/json" -XPOST localhost:8001/v1/chat/completions -d @test_payload_missing_param.json

View file

@ -1,33 +0,0 @@
{
"messages": [
{
"role": "user",
"content": "Find the area of a triangle with a base of 10 units and height of 5 units."
}
],
"tools": [
{
"name": "calculate_triangle_area",
"description": "Calculate the area of a triangle given its base and height.",
"parameters": {
"type": "dict",
"properties": {
"base": {
"type": "integer",
"description": "The base of the triangle.",
"required": true
},
"height": {
"type": "integer",
"description": "The height of the triangle.",
"required": true
},
"unit": {
"type": "string",
"description": "The unit of measure (defaults to 'units' if not specified)"
}
}
}
}
]
}

View file

@ -1,32 +0,0 @@
{
"messages": [
{
"role": "user",
"content": "Find the area of a triangle"
}
],
"tools": [
{
"name": "calculate_triangle_area",
"description": "Calculate the area of a triangle given its base and height.",
"parameters": {
"type": "dict",
"properties": {
"base": {
"type": "integer",
"description": "The base of the triangle."
},
"height": {
"type": "integer",
"description": "The height of the triangle."
},
"unit": {
"type": "string",
"description": "The unit of measure (defaults to 'units' if not specified)"
}
},
"required": ["base", "height"]
}
}
]
}

View file

@ -1,29 +0,0 @@
{
"messages": [
{
"role": "user",
"content": "how is the weather in San Francisco for next 5 days?"
}
],
"tools": [
{
"name": "weather_forecast",
"description": "This function resolver provides weather forecast information for a given city.",
"parameters": {
"type": "dict",
"properties": {
"days": {
"description": "The number of days for which the weather forecast is requested."
},
"units": {
"description": "The units in which the weather forecast is requested."
},
"city": {
"description": "The city for which the weather forecast is requested.",
"required": true
}
}
}
}
]
}

View file

@ -12,10 +12,6 @@
"name": "model_server",
"path": "model_server"
},
{
"name": "function_resolver",
"path": "function_resolver"
},
{
"name": "chatbot_ui",
"path": "chatbot_ui"

View file

@ -5,12 +5,11 @@
"version": "0.2.0",
"configurations": [
{
"name": "embedding server",
"cwd": "${workspaceFolder}/app",
"name": "model server",
"type": "debugpy",
"request": "launch",
"module": "uvicorn",
"args": ["main:app","--reload", "--port", "8000"],
"args": ["app.main:app","--reload", "--port", "8000"],
}
]
}

View file

@ -31,7 +31,7 @@ ENV NER_MODELS="urchade/gliner_large-v2.1"
COPY --from=builder /runtime /usr/local
COPY /app /app
COPY ./ /app
WORKDIR /app
RUN apt-get update && apt-get install -y \
@ -45,4 +45,4 @@ RUN apt-get update && apt-get install -y \
# RUN python install.py && \
# find /root/.cache/torch/sentence_transformers/ -name onnx -exec rm -rf {} +
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "80"]

View file

@ -1,9 +1,9 @@
import json
import random
from fastapi import FastAPI, Response
from arch_handler import ArchHandler
from bolt_handler import BoltHandler
from common import ChatMessage
from app.arch_fc.arch_handler import ArchHandler
from app.arch_fc.bolt_handler import BoltHandler
from app.arch_fc.common import ChatMessage
import logging
from openai import OpenAI
import os
@ -21,7 +21,7 @@ else:
logger.info(f"using model: {ollama_model}")
logger.info(f"using ollama endpoint: {ollama_endpoint}")
app = FastAPI()
# app = FastAPI()
client = OpenAI(
base_url='http://{}:11434/v1/'.format(ollama_endpoint),
@ -30,14 +30,7 @@ client = OpenAI(
api_key='ollama',
)
@app.get("/healthz")
async def healthz():
return {
"status": "ok"
}
@app.post("/v1/chat/completions")
async def chat_completion(req: ChatMessage, res: Response):
logger.info("starting request")
tools_encoded = handler._format_system(req.tools)

View file

@ -3,8 +3,8 @@ import sentence_transformers
from gliner import GLiNER
from transformers import AutoTokenizer, pipeline
import sqlite3
from employee_data_generator import generate_employee_data
from network_data_generator import (
from app.employee_data_generator import generate_employee_data
from app.network_data_generator import (
generate_device_data,
generate_interface_stats_data,
generate_flow_data,

View file

@ -1,17 +1,20 @@
import os
from fastapi import FastAPI, Response, HTTPException
from pydantic import BaseModel
from load_models import (
from app.load_models import (
load_ner_models,
load_transformers,
load_guard_model,
load_zero_shot_models,
)
from utils import GuardHandler, split_text_into_chunks
from app.utils import GuardHandler, split_text_into_chunks
import torch
import yaml
import string
import time
import logging
from app.arch_fc.arch_fc import chat_completion as arch_fc_chat_completion, ChatMessage
import os.path
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
@ -22,8 +25,11 @@ transformers = load_transformers()
ner_models = load_ner_models()
zero_shot_models = load_zero_shot_models()
with open("/root/arch_config.yaml", "r") as file:
config = yaml.safe_load(file)
config = {}
if os.path.exists("/root/arch_config.yaml"):
with open("/root/arch_config.yaml", "r") as file:
config = yaml.safe_load(file)
with open("guard_model_config.yaml") as f:
guard_model_config = yaml.safe_load(f)
@ -231,6 +237,12 @@ async def zeroshot(req: ZeroShotRequest, res: Response):
}
@app.post("/v1/chat/completions")
async def chat_completion(req: ChatMessage, res: Response):
result = await arch_fc_chat_completion(req, res)
return result
'''
*****
Adding new functions to test the usecases - Sampreeth

View file

@ -13,3 +13,6 @@ openvino
psutil
pandas
dateparser
openai
pandas
tf-keras