Fold function_resolver into model_server (#103)

This commit is contained in:
Adil Hafeez 2024-10-01 09:13:50 -07:00 committed by GitHub
parent b0ce5eca93
commit f4395d39f9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
24 changed files with 31 additions and 197 deletions

View file

@ -1,67 +0,0 @@
import json
import random
from fastapi import FastAPI, Response
from arch_handler import ArchHandler
from bolt_handler import BoltHandler
from common import ChatMessage
import logging
from openai import OpenAI
import os
ollama_endpoint = os.getenv("OLLAMA_ENDPOINT", "localhost")
ollama_model = os.getenv("OLLAMA_MODEL", "Arch-Function-Calling-1.5B-Q4_K_M")
logger = logging.getLogger('uvicorn.error')
handler = None
if ollama_model.startswith("Arch"):
handler = ArchHandler()
else:
handler = BoltHandler()
logger.info(f"using model: {ollama_model}")
logger.info(f"using ollama endpoint: {ollama_endpoint}")
app = FastAPI()
client = OpenAI(
base_url='http://{}:11434/v1/'.format(ollama_endpoint),
# required but ignored
api_key='ollama',
)
@app.get("/healthz")
async def healthz():
return {
"status": "ok"
}
@app.post("/v1/chat/completions")
async def chat_completion(req: ChatMessage, res: Response):
logger.info("starting request")
tools_encoded = handler._format_system(req.tools)
# append system prompt with tools to messages
messages = [{"role": "system", "content": tools_encoded}]
for message in req.messages:
messages.append({"role": message.role, "content": message.content})
logger.info(f"request model: {ollama_model}, messages: {json.dumps(messages)}")
resp = client.chat.completions.create(messages=messages, model=ollama_model, stream=False)
tools = handler.extract_tools(resp.choices[0].message.content)
tool_calls = []
for tool in tools:
for tool_name, tool_args in tool.items():
tool_calls.append({
"id": f"call_{random.randint(1000, 10000)}",
"type": "function",
"function": {
"name": tool_name,
"arguments": tool_args
}
})
if tools:
resp.choices[0].message.tool_calls = tool_calls
resp.choices[0].message.content = None
logger.info(f"response (tools): {json.dumps(tools)}")
logger.info(f"response: {json.dumps(resp.to_dict())}")
return resp