mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
Fold function_resolver into model_server (#103)
This commit is contained in:
parent
b0ce5eca93
commit
f4395d39f9
24 changed files with 31 additions and 197 deletions
|
|
@ -171,7 +171,7 @@ static_resources:
|
|||
- endpoint:
|
||||
address:
|
||||
socket_address:
|
||||
address: function_resolver
|
||||
address: model_server
|
||||
port_value: 80
|
||||
hostname: "arch_fc"
|
||||
{% for _, cluster in arch_clusters.items() %}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,5 @@ COPY config_generator/requirements.txt .
|
|||
RUN pip install -r requirements.txt
|
||||
COPY config_generator/config_generator.py .
|
||||
COPY arch/envoy.template.yaml .
|
||||
COPY arch/katanemo-config.yaml .
|
||||
|
||||
CMD ["python", "config_generator.py"]
|
||||
|
|
|
|||
|
|
@ -41,23 +41,10 @@ services:
|
|||
volumes:
|
||||
- ~/.cache/huggingface:/root/.cache/huggingface
|
||||
- ./arch_config.yaml:/root/arch_config.yaml
|
||||
|
||||
function_resolver:
|
||||
build:
|
||||
context: ../../function_resolver
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- "18082:80"
|
||||
healthcheck:
|
||||
test: ["CMD", "curl" ,"http://localhost:80/healthz"]
|
||||
interval: 5s
|
||||
retries: 20
|
||||
volumes:
|
||||
- ~/.cache/huggingface:/root/.cache/huggingface
|
||||
environment:
|
||||
# use ollama endpoint that is hosted by host machine (no virtualization)
|
||||
- OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-host.docker.internal}
|
||||
- OLLAMA_MODEL=Arch-Function-Calling-3B-Q4_K_M
|
||||
# use ollama endpoint that is hosted by host machine (no virtualization)
|
||||
# uncomment following line to use ollama endpoint that is hosted by docker
|
||||
# - OLLAMA_ENDPOINT=ollama
|
||||
# - OLLAMA_MODEL=Arch-Function-Calling-1.5B:Q4_K_M
|
||||
|
|
|
|||
16
function_resolver/.vscode/launch.json
vendored
16
function_resolver/.vscode/launch.json
vendored
|
|
@ -1,16 +0,0 @@
|
|||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "function resolver server",
|
||||
"cwd": "${workspaceFolder}/app",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"module": "uvicorn",
|
||||
"args": ["main:app","--reload", "--host", "0.0.0.0", "--port", "8001", "--log-config", "logger.yaml"],
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -1,30 +0,0 @@
|
|||
FROM python:3 AS base
|
||||
|
||||
#
|
||||
# builder
|
||||
#
|
||||
FROM base AS builder
|
||||
|
||||
WORKDIR /src
|
||||
|
||||
COPY requirements.txt /src/
|
||||
RUN pip install --prefix=/runtime --force-reinstall -r requirements.txt
|
||||
|
||||
COPY . /src
|
||||
|
||||
#
|
||||
# output
|
||||
#
|
||||
|
||||
FROM python:3-slim AS output
|
||||
|
||||
COPY --from=builder /runtime /usr/local
|
||||
|
||||
COPY /app /app
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && apt-get install -y \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80", "--log-config", "logger.yaml"]
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
fastapi
|
||||
uvicorn
|
||||
PyYAML
|
||||
openai
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
PORT="${PORT:-8001}"
|
||||
|
||||
echo localhost:$PORT/v1/chat/completions
|
||||
|
||||
curl -H "content-type: application/json" -XPOST localhost:$PORT/v1/chat/completions -d @test_payload.json
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
PORT="${PORT:-8001}"
|
||||
|
||||
echo localhost:$PORT/bolt_fc_1b/v1/chat/completions
|
||||
|
||||
curl -v -H "content-type: application/json" -XPOST localhost:$PORT/bolt_fc_1b/v1/chat/completions -d @test_payload.json
|
||||
|
|
@ -1 +0,0 @@
|
|||
curl -H "content-type: application/json" -XPOST localhost:8001/v1/chat/completions -d @test_payload_missing_param.json
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
{
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Find the area of a triangle with a base of 10 units and height of 5 units."
|
||||
}
|
||||
],
|
||||
"tools": [
|
||||
{
|
||||
"name": "calculate_triangle_area",
|
||||
"description": "Calculate the area of a triangle given its base and height.",
|
||||
"parameters": {
|
||||
"type": "dict",
|
||||
"properties": {
|
||||
"base": {
|
||||
"type": "integer",
|
||||
"description": "The base of the triangle.",
|
||||
"required": true
|
||||
},
|
||||
"height": {
|
||||
"type": "integer",
|
||||
"description": "The height of the triangle.",
|
||||
"required": true
|
||||
},
|
||||
"unit": {
|
||||
"type": "string",
|
||||
"description": "The unit of measure (defaults to 'units' if not specified)"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -1,32 +0,0 @@
|
|||
{
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Find the area of a triangle"
|
||||
}
|
||||
],
|
||||
"tools": [
|
||||
{
|
||||
"name": "calculate_triangle_area",
|
||||
"description": "Calculate the area of a triangle given its base and height.",
|
||||
"parameters": {
|
||||
"type": "dict",
|
||||
"properties": {
|
||||
"base": {
|
||||
"type": "integer",
|
||||
"description": "The base of the triangle."
|
||||
},
|
||||
"height": {
|
||||
"type": "integer",
|
||||
"description": "The height of the triangle."
|
||||
},
|
||||
"unit": {
|
||||
"type": "string",
|
||||
"description": "The unit of measure (defaults to 'units' if not specified)"
|
||||
}
|
||||
},
|
||||
"required": ["base", "height"]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -1,29 +0,0 @@
|
|||
{
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "how is the weather in San Francisco for next 5 days?"
|
||||
}
|
||||
],
|
||||
"tools": [
|
||||
{
|
||||
"name": "weather_forecast",
|
||||
"description": "This function resolver provides weather forecast information for a given city.",
|
||||
"parameters": {
|
||||
"type": "dict",
|
||||
"properties": {
|
||||
"days": {
|
||||
"description": "The number of days for which the weather forecast is requested."
|
||||
},
|
||||
"units": {
|
||||
"description": "The units in which the weather forecast is requested."
|
||||
},
|
||||
"city": {
|
||||
"description": "The city for which the weather forecast is requested.",
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -12,10 +12,6 @@
|
|||
"name": "model_server",
|
||||
"path": "model_server"
|
||||
},
|
||||
{
|
||||
"name": "function_resolver",
|
||||
"path": "function_resolver"
|
||||
},
|
||||
{
|
||||
"name": "chatbot_ui",
|
||||
"path": "chatbot_ui"
|
||||
|
|
|
|||
5
model_server/.vscode/launch.json
vendored
5
model_server/.vscode/launch.json
vendored
|
|
@ -5,12 +5,11 @@
|
|||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "embedding server",
|
||||
"cwd": "${workspaceFolder}/app",
|
||||
"name": "model server",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"module": "uvicorn",
|
||||
"args": ["main:app","--reload", "--port", "8000"],
|
||||
"args": ["app.main:app","--reload", "--port", "8000"],
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ ENV NER_MODELS="urchade/gliner_large-v2.1"
|
|||
|
||||
COPY --from=builder /runtime /usr/local
|
||||
|
||||
COPY /app /app
|
||||
COPY ./ /app
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && apt-get install -y \
|
||||
|
|
@ -45,4 +45,4 @@ RUN apt-get update && apt-get install -y \
|
|||
# RUN python install.py && \
|
||||
# find /root/.cache/torch/sentence_transformers/ -name onnx -exec rm -rf {} +
|
||||
|
||||
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "80"]
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
import json
|
||||
import random
|
||||
from fastapi import FastAPI, Response
|
||||
from arch_handler import ArchHandler
|
||||
from bolt_handler import BoltHandler
|
||||
from common import ChatMessage
|
||||
from app.arch_fc.arch_handler import ArchHandler
|
||||
from app.arch_fc.bolt_handler import BoltHandler
|
||||
from app.arch_fc.common import ChatMessage
|
||||
import logging
|
||||
from openai import OpenAI
|
||||
import os
|
||||
|
|
@ -21,7 +21,7 @@ else:
|
|||
logger.info(f"using model: {ollama_model}")
|
||||
logger.info(f"using ollama endpoint: {ollama_endpoint}")
|
||||
|
||||
app = FastAPI()
|
||||
# app = FastAPI()
|
||||
|
||||
client = OpenAI(
|
||||
base_url='http://{}:11434/v1/'.format(ollama_endpoint),
|
||||
|
|
@ -30,14 +30,7 @@ client = OpenAI(
|
|||
api_key='ollama',
|
||||
)
|
||||
|
||||
@app.get("/healthz")
|
||||
async def healthz():
|
||||
return {
|
||||
"status": "ok"
|
||||
}
|
||||
|
||||
|
||||
@app.post("/v1/chat/completions")
|
||||
async def chat_completion(req: ChatMessage, res: Response):
|
||||
logger.info("starting request")
|
||||
tools_encoded = handler._format_system(req.tools)
|
||||
|
|
@ -3,8 +3,8 @@ import sentence_transformers
|
|||
from gliner import GLiNER
|
||||
from transformers import AutoTokenizer, pipeline
|
||||
import sqlite3
|
||||
from employee_data_generator import generate_employee_data
|
||||
from network_data_generator import (
|
||||
from app.employee_data_generator import generate_employee_data
|
||||
from app.network_data_generator import (
|
||||
generate_device_data,
|
||||
generate_interface_stats_data,
|
||||
generate_flow_data,
|
||||
|
|
|
|||
|
|
@ -1,17 +1,20 @@
|
|||
import os
|
||||
from fastapi import FastAPI, Response, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from load_models import (
|
||||
from app.load_models import (
|
||||
load_ner_models,
|
||||
load_transformers,
|
||||
load_guard_model,
|
||||
load_zero_shot_models,
|
||||
)
|
||||
from utils import GuardHandler, split_text_into_chunks
|
||||
from app.utils import GuardHandler, split_text_into_chunks
|
||||
import torch
|
||||
import yaml
|
||||
import string
|
||||
import time
|
||||
import logging
|
||||
from app.arch_fc.arch_fc import chat_completion as arch_fc_chat_completion, ChatMessage
|
||||
import os.path
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
||||
|
|
@ -22,8 +25,11 @@ transformers = load_transformers()
|
|||
ner_models = load_ner_models()
|
||||
zero_shot_models = load_zero_shot_models()
|
||||
|
||||
with open("/root/arch_config.yaml", "r") as file:
|
||||
config = yaml.safe_load(file)
|
||||
config = {}
|
||||
|
||||
if os.path.exists("/root/arch_config.yaml"):
|
||||
with open("/root/arch_config.yaml", "r") as file:
|
||||
config = yaml.safe_load(file)
|
||||
with open("guard_model_config.yaml") as f:
|
||||
guard_model_config = yaml.safe_load(f)
|
||||
|
||||
|
|
@ -231,6 +237,12 @@ async def zeroshot(req: ZeroShotRequest, res: Response):
|
|||
}
|
||||
|
||||
|
||||
@app.post("/v1/chat/completions")
|
||||
async def chat_completion(req: ChatMessage, res: Response):
|
||||
result = await arch_fc_chat_completion(req, res)
|
||||
return result
|
||||
|
||||
|
||||
'''
|
||||
*****
|
||||
Adding new functions to test the usecases - Sampreeth
|
||||
|
|
|
|||
|
|
@ -13,3 +13,6 @@ openvino
|
|||
psutil
|
||||
pandas
|
||||
dateparser
|
||||
openai
|
||||
pandas
|
||||
tf-keras
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue