Fold function_resolver into model_server (#103)

2026-06-08 14:55:14 +02:00 · 2024-10-01 09:13:50 -07:00 · 2024-10-01 09:13:50 -07:00 · f4395d39f9
commit f4395d39f9
parent b0ce5eca93
24 changed files with 31 additions and 197 deletions
--- a/arch/envoy.template.yaml
+++ b/arch/envoy.template.yaml
@ -171,7 +171,7 @@ static_resources:
              - endpoint:
                  address:
                    socket_address:
-                      address: function_resolver
+                      address: model_server
                      port_value: 80
                  hostname: "arch_fc"
 {% for _, cluster in arch_clusters.items() %}
--- a/config_generator/Dockerfile
+++ b/config_generator/Dockerfile
@ -4,6 +4,5 @@ COPY config_generator/requirements.txt .
 RUN pip install -r requirements.txt
 COPY config_generator/config_generator.py .
 COPY arch/envoy.template.yaml .
-COPY arch/katanemo-config.yaml .

 CMD ["python", "config_generator.py"]
--- a/demos/function_calling/docker-compose.yaml
+++ b/demos/function_calling/docker-compose.yaml
@ -41,23 +41,10 @@ services:
    volumes:
      - ~/.cache/huggingface:/root/.cache/huggingface
      - ./arch_config.yaml:/root/arch_config.yaml
-
-  function_resolver:
-    build:
-      context: ../../function_resolver
-      dockerfile: Dockerfile
-    ports:
-      - "18082:80"
-    healthcheck:
-        test: ["CMD", "curl" ,"http://localhost:80/healthz"]
-        interval: 5s
-        retries: 20
-    volumes:
-      - ~/.cache/huggingface:/root/.cache/huggingface
    environment:
-      # use ollama endpoint that is hosted by host machine (no virtualization)
      - OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-host.docker.internal}
      - OLLAMA_MODEL=Arch-Function-Calling-3B-Q4_K_M
+      # use ollama endpoint that is hosted by host machine (no virtualization)
      # uncomment following line to use ollama endpoint that is hosted by docker
      # - OLLAMA_ENDPOINT=ollama
      # - OLLAMA_MODEL=Arch-Function-Calling-1.5B:Q4_K_M
--- a/function_resolver/.vscode/launch.json
+++ b/function_resolver/.vscode/launch.json
@ -1,16 +0,0 @@
-{
-  // Use IntelliSense to learn about possible attributes.
-  // Hover to view descriptions of existing attributes.
-  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
-  "version": "0.2.0",
-  "configurations": [
-    {
-      "name": "function resolver server",
-      "cwd": "${workspaceFolder}/app",
-      "type": "debugpy",
-      "request": "launch",
-      "module": "uvicorn",
-      "args": ["main:app","--reload", "--host", "0.0.0.0", "--port", "8001", "--log-config", "logger.yaml"],
-    }
-  ]
-}
--- a/function_resolver/Dockerfile
+++ b/function_resolver/Dockerfile
@ -1,30 +0,0 @@
-FROM python:3 AS base
-
-#
-# builder
-#
-FROM base AS builder
-
-WORKDIR /src
-
-COPY requirements.txt /src/
-RUN pip install --prefix=/runtime --force-reinstall -r requirements.txt
-
-COPY . /src
-
-#
-# output
-#
-
-FROM python:3-slim AS output
-
-COPY --from=builder /runtime /usr/local
-
-COPY /app /app
-WORKDIR /app
-
-RUN apt-get update && apt-get install -y \
-  curl \
-  && rm -rf /var/lib/apt/lists/*
-
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80", "--log-config", "logger.yaml"]
--- a/function_resolver/requirements.txt
+++ b/function_resolver/requirements.txt
@ -1,4 +0,0 @@
-fastapi
-uvicorn
-PyYAML
-openai
--- a/function_resolver/test/test.sh
+++ b/function_resolver/test/test.sh
@ -1,5 +0,0 @@
-PORT="${PORT:-8001}"
-
-echo localhost:$PORT/v1/chat/completions
-
-curl -H "content-type: application/json" -XPOST localhost:$PORT/v1/chat/completions -d @test_payload.json
--- a/function_resolver/test/test_envoy_cluster.sh
+++ b/function_resolver/test/test_envoy_cluster.sh
@ -1,5 +0,0 @@
-PORT="${PORT:-8001}"
-
-echo localhost:$PORT/bolt_fc_1b/v1/chat/completions
-
-curl -v -H "content-type: application/json" -XPOST localhost:$PORT/bolt_fc_1b/v1/chat/completions -d @test_payload.json
--- a/function_resolver/test/test_missing_param.sh
+++ b/function_resolver/test/test_missing_param.sh
@ -1 +0,0 @@
-curl -H "content-type: application/json" -XPOST localhost:8001/v1/chat/completions -d @test_payload_missing_param.json
--- a/function_resolver/test/test_payload.json
+++ b/function_resolver/test/test_payload.json
@ -1,33 +0,0 @@
-{
-  "messages": [
-    {
-      "role": "user",
-      "content": "Find the area of a triangle with a base of 10 units and height of 5 units."
-    }
-  ],
-  "tools": [
-    {
-      "name": "calculate_triangle_area",
-      "description": "Calculate the area of a triangle given its base and height.",
-      "parameters": {
-        "type": "dict",
-        "properties": {
-          "base": {
-            "type": "integer",
-            "description": "The base of the triangle.",
-            "required": true
-          },
-          "height": {
-            "type": "integer",
-            "description": "The height of the triangle.",
-            "required": true
-          },
-          "unit": {
-            "type": "string",
-            "description": "The unit of measure (defaults to 'units' if not specified)"
-          }
-        }
-      }
-    }
-  ]
-}
--- a/function_resolver/test/test_payload_missing_param.json
+++ b/function_resolver/test/test_payload_missing_param.json
@ -1,32 +0,0 @@
-{
-  "messages": [
-    {
-      "role": "user",
-      "content": "Find the area of a triangle"
-    }
-  ],
-  "tools": [
-    {
-      "name": "calculate_triangle_area",
-      "description": "Calculate the area of a triangle given its base and height.",
-      "parameters": {
-        "type": "dict",
-        "properties": {
-          "base": {
-            "type": "integer",
-            "description": "The base of the triangle."
-          },
-          "height": {
-            "type": "integer",
-            "description": "The height of the triangle."
-          },
-          "unit": {
-            "type": "string",
-            "description": "The unit of measure (defaults to 'units' if not specified)"
-          }
-        },
-        "required": ["base", "height"]
-      }
-    }
-  ]
-}
--- a/function_resolver/test/test_weather.json
+++ b/function_resolver/test/test_weather.json
@ -1,29 +0,0 @@
-{
-  "messages": [
-    {
-      "role": "user",
-      "content": "how is the weather in San Francisco for next 5 days?"
-    }
-  ],
-  "tools": [
-    {
-      "name": "weather_forecast",
-      "description": "This function resolver provides weather forecast information for a given city.",
-      "parameters": {
-        "type": "dict",
-        "properties": {
-          "days": {
-            "description": "The number of days for which the weather forecast is requested."
-          },
-          "units": {
-            "description": "The units in which the weather forecast is requested."
-          },
-          "city": {
-            "description": "The city for which the weather forecast is requested.",
-            "required": true
-          }
-        }
-      }
-    }
-  ]
-}
--- a/gateway.code-workspace
+++ b/gateway.code-workspace
@ -12,10 +12,6 @@
      "name": "model_server",
      "path": "model_server"
    },
-    {
-      "name": "function_resolver",
-      "path": "function_resolver"
-    },
    {
      "name": "chatbot_ui",
      "path": "chatbot_ui"
--- a/model_server/.vscode/launch.json
+++ b/model_server/.vscode/launch.json
@ -5,12 +5,11 @@
  "version": "0.2.0",
  "configurations": [
    {
-      "name": "embedding server",
-      "cwd": "${workspaceFolder}/app",
+      "name": "model server",
      "type": "debugpy",
      "request": "launch",
      "module": "uvicorn",
-      "args": ["main:app","--reload", "--port", "8000"],
+      "args": ["app.main:app","--reload", "--port", "8000"],
    }
  ]
 }
--- a/model_server/Dockerfile
+++ b/model_server/Dockerfile
@ -31,7 +31,7 @@ ENV NER_MODELS="urchade/gliner_large-v2.1"

 COPY --from=builder /runtime /usr/local

-COPY /app /app
+COPY ./ /app
 WORKDIR /app

 RUN apt-get update && apt-get install -y \
@ -45,4 +45,4 @@ RUN apt-get update && apt-get install -y \
 # RUN python install.py && \
 #   find /root/.cache/torch/sentence_transformers/ -name onnx -exec rm -rf {} +

-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "80"]
--- a/model_server/app/arch_fc/arch_fc.py
+++ b/model_server/app/arch_fc/arch_fc.py
@ -1,9 +1,9 @@
 import json
 import random
 from fastapi import FastAPI, Response
-from arch_handler import ArchHandler
-from bolt_handler import BoltHandler
-from common import ChatMessage
+from app.arch_fc.arch_handler import ArchHandler
+from app.arch_fc.bolt_handler import BoltHandler
+from app.arch_fc.common import ChatMessage
 import logging
 from openai import OpenAI
 import os
@ -21,7 +21,7 @@ else:
 logger.info(f"using model: {ollama_model}")
 logger.info(f"using ollama endpoint: {ollama_endpoint}")

-app = FastAPI()
+# app = FastAPI()

 client = OpenAI(
    base_url='http://{}:11434/v1/'.format(ollama_endpoint),
@ -30,14 +30,7 @@ client = OpenAI(
    api_key='ollama',
 )

-@app.get("/healthz")
-async def healthz():
-    return {
-        "status": "ok"
-    }

-
-@app.post("/v1/chat/completions")
 async def chat_completion(req: ChatMessage, res: Response):
    logger.info("starting request")
    tools_encoded = handler._format_system(req.tools)
--- a/model_server/app/arch_fc/arch_handler.py
+++ b/model_server/app/arch_fc/arch_handler.py
--- a/model_server/app/arch_fc/bolt_handler.py
+++ b/model_server/app/arch_fc/bolt_handler.py
--- a/model_server/app/arch_fc/common.py
+++ b/model_server/app/arch_fc/common.py
--- a/model_server/app/arch_fc/logger.yaml
+++ b/model_server/app/arch_fc/logger.yaml
--- a/model_server/app/load_models.py
+++ b/model_server/app/load_models.py
@ -3,8 +3,8 @@ import sentence_transformers
 from gliner import GLiNER
 from transformers import AutoTokenizer, pipeline
 import sqlite3
-from employee_data_generator import generate_employee_data
-from network_data_generator import (
+from app.employee_data_generator import generate_employee_data
+from app.network_data_generator import (
    generate_device_data,
    generate_interface_stats_data,
    generate_flow_data,
--- a/model_server/app/main.py
+++ b/model_server/app/main.py
@ -1,17 +1,20 @@
+import os
 from fastapi import FastAPI, Response, HTTPException
 from pydantic import BaseModel
-from load_models import (
+from app.load_models import (
    load_ner_models,
    load_transformers,
    load_guard_model,
    load_zero_shot_models,
 )
-from utils import GuardHandler, split_text_into_chunks
+from app.utils import GuardHandler, split_text_into_chunks
 import torch
 import yaml
 import string
 import time
 import logging
+from app.arch_fc.arch_fc import chat_completion as arch_fc_chat_completion, ChatMessage
+import os.path

 logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
@ -22,8 +25,11 @@ transformers = load_transformers()
 ner_models = load_ner_models()
 zero_shot_models = load_zero_shot_models()

-with open("/root/arch_config.yaml", "r") as file:
-    config = yaml.safe_load(file)
+config = {}
+
+if os.path.exists("/root/arch_config.yaml"):
+  with open("/root/arch_config.yaml", "r") as file:
+      config = yaml.safe_load(file)
 with open("guard_model_config.yaml") as f:
    guard_model_config = yaml.safe_load(f)

@ -231,6 +237,12 @@ async def zeroshot(req: ZeroShotRequest, res: Response):
    }


+@app.post("/v1/chat/completions")
+async def chat_completion(req: ChatMessage, res: Response):
+    result = await arch_fc_chat_completion(req, res)
+    return result
+
+
 '''
 *****
 Adding new functions to test the usecases - Sampreeth
--- a/model_server/app/guard_model_config.yaml
+++ b/model_server/app/guard_model_config.yaml
--- a/model_server/requirements.txt
+++ b/model_server/requirements.txt
@ -13,3 +13,6 @@ openvino
 psutil
 pandas
 dateparser
+openai
+pandas
+tf-keras
				`@ -1 +0,0 @@`
				`curl -H "content-type: application/json" -XPOST localhost:8001/v1/chat/completions -d @test_payload_missing_param.json`