diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml index fb6a4f3b..76ce41be 100644 --- a/arch/envoy.template.yaml +++ b/arch/envoy.template.yaml @@ -171,7 +171,7 @@ static_resources: - endpoint: address: socket_address: - address: function_resolver + address: model_server port_value: 80 hostname: "arch_fc" {% for _, cluster in arch_clusters.items() %} diff --git a/config_generator/Dockerfile b/config_generator/Dockerfile index 378a4176..fcfd0655 100644 --- a/config_generator/Dockerfile +++ b/config_generator/Dockerfile @@ -4,6 +4,5 @@ COPY config_generator/requirements.txt . RUN pip install -r requirements.txt COPY config_generator/config_generator.py . COPY arch/envoy.template.yaml . -COPY arch/katanemo-config.yaml . CMD ["python", "config_generator.py"] diff --git a/demos/function_calling/docker-compose.yaml b/demos/function_calling/docker-compose.yaml index a2bccf9f..a4365f97 100644 --- a/demos/function_calling/docker-compose.yaml +++ b/demos/function_calling/docker-compose.yaml @@ -41,23 +41,10 @@ services: volumes: - ~/.cache/huggingface:/root/.cache/huggingface - ./arch_config.yaml:/root/arch_config.yaml - - function_resolver: - build: - context: ../../function_resolver - dockerfile: Dockerfile - ports: - - "18082:80" - healthcheck: - test: ["CMD", "curl" ,"http://localhost:80/healthz"] - interval: 5s - retries: 20 - volumes: - - ~/.cache/huggingface:/root/.cache/huggingface environment: - # use ollama endpoint that is hosted by host machine (no virtualization) - OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-host.docker.internal} - OLLAMA_MODEL=Arch-Function-Calling-3B-Q4_K_M + # use ollama endpoint that is hosted by host machine (no virtualization) # uncomment following line to use ollama endpoint that is hosted by docker # - OLLAMA_ENDPOINT=ollama # - OLLAMA_MODEL=Arch-Function-Calling-1.5B:Q4_K_M diff --git a/function_resolver/.vscode/launch.json b/function_resolver/.vscode/launch.json deleted file mode 100644 index 0de58feb..00000000 --- a/function_resolver/.vscode/launch.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 - "version": "0.2.0", - "configurations": [ - { - "name": "function resolver server", - "cwd": "${workspaceFolder}/app", - "type": "debugpy", - "request": "launch", - "module": "uvicorn", - "args": ["main:app","--reload", "--host", "0.0.0.0", "--port", "8001", "--log-config", "logger.yaml"], - } - ] -} diff --git a/function_resolver/Dockerfile b/function_resolver/Dockerfile deleted file mode 100644 index fc2400c8..00000000 --- a/function_resolver/Dockerfile +++ /dev/null @@ -1,30 +0,0 @@ -FROM python:3 AS base - -# -# builder -# -FROM base AS builder - -WORKDIR /src - -COPY requirements.txt /src/ -RUN pip install --prefix=/runtime --force-reinstall -r requirements.txt - -COPY . /src - -# -# output -# - -FROM python:3-slim AS output - -COPY --from=builder /runtime /usr/local - -COPY /app /app -WORKDIR /app - -RUN apt-get update && apt-get install -y \ - curl \ - && rm -rf /var/lib/apt/lists/* - -CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80", "--log-config", "logger.yaml"] diff --git a/function_resolver/requirements.txt b/function_resolver/requirements.txt deleted file mode 100644 index fc70ebb5..00000000 --- a/function_resolver/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -fastapi -uvicorn -PyYAML -openai diff --git a/function_resolver/test/test.sh b/function_resolver/test/test.sh deleted file mode 100644 index 618b5136..00000000 --- a/function_resolver/test/test.sh +++ /dev/null @@ -1,5 +0,0 @@ -PORT="${PORT:-8001}" - -echo localhost:$PORT/v1/chat/completions - -curl -H "content-type: application/json" -XPOST localhost:$PORT/v1/chat/completions -d @test_payload.json diff --git a/function_resolver/test/test_envoy_cluster.sh b/function_resolver/test/test_envoy_cluster.sh deleted file mode 100644 index 3aea11e5..00000000 --- a/function_resolver/test/test_envoy_cluster.sh +++ /dev/null @@ -1,5 +0,0 @@ -PORT="${PORT:-8001}" - -echo localhost:$PORT/bolt_fc_1b/v1/chat/completions - -curl -v -H "content-type: application/json" -XPOST localhost:$PORT/bolt_fc_1b/v1/chat/completions -d @test_payload.json diff --git a/function_resolver/test/test_missing_param.sh b/function_resolver/test/test_missing_param.sh deleted file mode 100644 index 67d40c5c..00000000 --- a/function_resolver/test/test_missing_param.sh +++ /dev/null @@ -1 +0,0 @@ -curl -H "content-type: application/json" -XPOST localhost:8001/v1/chat/completions -d @test_payload_missing_param.json diff --git a/function_resolver/test/test_payload.json b/function_resolver/test/test_payload.json deleted file mode 100644 index 66e3d202..00000000 --- a/function_resolver/test/test_payload.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "messages": [ - { - "role": "user", - "content": "Find the area of a triangle with a base of 10 units and height of 5 units." - } - ], - "tools": [ - { - "name": "calculate_triangle_area", - "description": "Calculate the area of a triangle given its base and height.", - "parameters": { - "type": "dict", - "properties": { - "base": { - "type": "integer", - "description": "The base of the triangle.", - "required": true - }, - "height": { - "type": "integer", - "description": "The height of the triangle.", - "required": true - }, - "unit": { - "type": "string", - "description": "The unit of measure (defaults to 'units' if not specified)" - } - } - } - } - ] -} diff --git a/function_resolver/test/test_payload_missing_param.json b/function_resolver/test/test_payload_missing_param.json deleted file mode 100644 index 7f23f27c..00000000 --- a/function_resolver/test/test_payload_missing_param.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "messages": [ - { - "role": "user", - "content": "Find the area of a triangle" - } - ], - "tools": [ - { - "name": "calculate_triangle_area", - "description": "Calculate the area of a triangle given its base and height.", - "parameters": { - "type": "dict", - "properties": { - "base": { - "type": "integer", - "description": "The base of the triangle." - }, - "height": { - "type": "integer", - "description": "The height of the triangle." - }, - "unit": { - "type": "string", - "description": "The unit of measure (defaults to 'units' if not specified)" - } - }, - "required": ["base", "height"] - } - } - ] -} diff --git a/function_resolver/test/test_weather.json b/function_resolver/test/test_weather.json deleted file mode 100644 index d26e717e..00000000 --- a/function_resolver/test/test_weather.json +++ /dev/null @@ -1,29 +0,0 @@ -{ - "messages": [ - { - "role": "user", - "content": "how is the weather in San Francisco for next 5 days?" - } - ], - "tools": [ - { - "name": "weather_forecast", - "description": "This function resolver provides weather forecast information for a given city.", - "parameters": { - "type": "dict", - "properties": { - "days": { - "description": "The number of days for which the weather forecast is requested." - }, - "units": { - "description": "The units in which the weather forecast is requested." - }, - "city": { - "description": "The city for which the weather forecast is requested.", - "required": true - } - } - } - } - ] -} diff --git a/gateway.code-workspace b/gateway.code-workspace index a35227cf..f8f39f5c 100644 --- a/gateway.code-workspace +++ b/gateway.code-workspace @@ -12,10 +12,6 @@ "name": "model_server", "path": "model_server" }, - { - "name": "function_resolver", - "path": "function_resolver" - }, { "name": "chatbot_ui", "path": "chatbot_ui" diff --git a/model_server/.vscode/launch.json b/model_server/.vscode/launch.json index b6039baa..23828ad7 100644 --- a/model_server/.vscode/launch.json +++ b/model_server/.vscode/launch.json @@ -5,12 +5,11 @@ "version": "0.2.0", "configurations": [ { - "name": "embedding server", - "cwd": "${workspaceFolder}/app", + "name": "model server", "type": "debugpy", "request": "launch", "module": "uvicorn", - "args": ["main:app","--reload", "--port", "8000"], + "args": ["app.main:app","--reload", "--port", "8000"], } ] } diff --git a/model_server/Dockerfile b/model_server/Dockerfile index 48c2d57e..fef7471d 100644 --- a/model_server/Dockerfile +++ b/model_server/Dockerfile @@ -31,7 +31,7 @@ ENV NER_MODELS="urchade/gliner_large-v2.1" COPY --from=builder /runtime /usr/local -COPY /app /app +COPY ./ /app WORKDIR /app RUN apt-get update && apt-get install -y \ @@ -45,4 +45,4 @@ RUN apt-get update && apt-get install -y \ # RUN python install.py && \ # find /root/.cache/torch/sentence_transformers/ -name onnx -exec rm -rf {} + -CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"] +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "80"] diff --git a/function_resolver/app/main.py b/model_server/app/arch_fc/arch_fc.py similarity index 88% rename from function_resolver/app/main.py rename to model_server/app/arch_fc/arch_fc.py index 4da5465f..2cc70851 100644 --- a/function_resolver/app/main.py +++ b/model_server/app/arch_fc/arch_fc.py @@ -1,9 +1,9 @@ import json import random from fastapi import FastAPI, Response -from arch_handler import ArchHandler -from bolt_handler import BoltHandler -from common import ChatMessage +from app.arch_fc.arch_handler import ArchHandler +from app.arch_fc.bolt_handler import BoltHandler +from app.arch_fc.common import ChatMessage import logging from openai import OpenAI import os @@ -21,7 +21,7 @@ else: logger.info(f"using model: {ollama_model}") logger.info(f"using ollama endpoint: {ollama_endpoint}") -app = FastAPI() +# app = FastAPI() client = OpenAI( base_url='http://{}:11434/v1/'.format(ollama_endpoint), @@ -30,14 +30,7 @@ client = OpenAI( api_key='ollama', ) -@app.get("/healthz") -async def healthz(): - return { - "status": "ok" - } - -@app.post("/v1/chat/completions") async def chat_completion(req: ChatMessage, res: Response): logger.info("starting request") tools_encoded = handler._format_system(req.tools) diff --git a/function_resolver/app/arch_handler.py b/model_server/app/arch_fc/arch_handler.py similarity index 100% rename from function_resolver/app/arch_handler.py rename to model_server/app/arch_fc/arch_handler.py diff --git a/function_resolver/app/bolt_handler.py b/model_server/app/arch_fc/bolt_handler.py similarity index 100% rename from function_resolver/app/bolt_handler.py rename to model_server/app/arch_fc/bolt_handler.py diff --git a/function_resolver/app/common.py b/model_server/app/arch_fc/common.py similarity index 100% rename from function_resolver/app/common.py rename to model_server/app/arch_fc/common.py diff --git a/function_resolver/app/logger.yaml b/model_server/app/arch_fc/logger.yaml similarity index 100% rename from function_resolver/app/logger.yaml rename to model_server/app/arch_fc/logger.yaml diff --git a/model_server/app/load_models.py b/model_server/app/load_models.py index 2c715d67..c7dd939e 100644 --- a/model_server/app/load_models.py +++ b/model_server/app/load_models.py @@ -3,8 +3,8 @@ import sentence_transformers from gliner import GLiNER from transformers import AutoTokenizer, pipeline import sqlite3 -from employee_data_generator import generate_employee_data -from network_data_generator import ( +from app.employee_data_generator import generate_employee_data +from app.network_data_generator import ( generate_device_data, generate_interface_stats_data, generate_flow_data, diff --git a/model_server/app/main.py b/model_server/app/main.py index 66e9de43..accc379d 100644 --- a/model_server/app/main.py +++ b/model_server/app/main.py @@ -1,17 +1,20 @@ +import os from fastapi import FastAPI, Response, HTTPException from pydantic import BaseModel -from load_models import ( +from app.load_models import ( load_ner_models, load_transformers, load_guard_model, load_zero_shot_models, ) -from utils import GuardHandler, split_text_into_chunks +from app.utils import GuardHandler, split_text_into_chunks import torch import yaml import string import time import logging +from app.arch_fc.arch_fc import chat_completion as arch_fc_chat_completion, ChatMessage +import os.path logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" @@ -22,8 +25,11 @@ transformers = load_transformers() ner_models = load_ner_models() zero_shot_models = load_zero_shot_models() -with open("/root/arch_config.yaml", "r") as file: - config = yaml.safe_load(file) +config = {} + +if os.path.exists("/root/arch_config.yaml"): + with open("/root/arch_config.yaml", "r") as file: + config = yaml.safe_load(file) with open("guard_model_config.yaml") as f: guard_model_config = yaml.safe_load(f) @@ -231,6 +237,12 @@ async def zeroshot(req: ZeroShotRequest, res: Response): } +@app.post("/v1/chat/completions") +async def chat_completion(req: ChatMessage, res: Response): + result = await arch_fc_chat_completion(req, res) + return result + + ''' ***** Adding new functions to test the usecases - Sampreeth diff --git a/model_server/app/guard_model_config.yaml b/model_server/guard_model_config.yaml similarity index 100% rename from model_server/app/guard_model_config.yaml rename to model_server/guard_model_config.yaml diff --git a/model_server/requirements.txt b/model_server/requirements.txt index ef39a36c..1320d843 100644 --- a/model_server/requirements.txt +++ b/model_server/requirements.txt @@ -13,3 +13,6 @@ openvino psutil pandas dateparser +openai +pandas +tf-keras