mirror of
https://github.com/katanemo/plano.git
synced 2026-04-27 09:46:28 +02:00
Fold function_resolver into model_server (#103)
This commit is contained in:
parent
b0ce5eca93
commit
f4395d39f9
24 changed files with 31 additions and 197 deletions
|
|
@ -1,67 +0,0 @@
|
|||
import json
|
||||
import random
|
||||
from fastapi import FastAPI, Response
|
||||
from arch_handler import ArchHandler
|
||||
from bolt_handler import BoltHandler
|
||||
from common import ChatMessage
|
||||
import logging
|
||||
from openai import OpenAI
|
||||
import os
|
||||
|
||||
ollama_endpoint = os.getenv("OLLAMA_ENDPOINT", "localhost")
|
||||
ollama_model = os.getenv("OLLAMA_MODEL", "Arch-Function-Calling-1.5B-Q4_K_M")
|
||||
logger = logging.getLogger('uvicorn.error')
|
||||
|
||||
handler = None
|
||||
if ollama_model.startswith("Arch"):
|
||||
handler = ArchHandler()
|
||||
else:
|
||||
handler = BoltHandler()
|
||||
|
||||
logger.info(f"using model: {ollama_model}")
|
||||
logger.info(f"using ollama endpoint: {ollama_endpoint}")
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
client = OpenAI(
|
||||
base_url='http://{}:11434/v1/'.format(ollama_endpoint),
|
||||
|
||||
# required but ignored
|
||||
api_key='ollama',
|
||||
)
|
||||
|
||||
@app.get("/healthz")
|
||||
async def healthz():
|
||||
return {
|
||||
"status": "ok"
|
||||
}
|
||||
|
||||
|
||||
@app.post("/v1/chat/completions")
|
||||
async def chat_completion(req: ChatMessage, res: Response):
|
||||
logger.info("starting request")
|
||||
tools_encoded = handler._format_system(req.tools)
|
||||
# append system prompt with tools to messages
|
||||
messages = [{"role": "system", "content": tools_encoded}]
|
||||
for message in req.messages:
|
||||
messages.append({"role": message.role, "content": message.content})
|
||||
logger.info(f"request model: {ollama_model}, messages: {json.dumps(messages)}")
|
||||
resp = client.chat.completions.create(messages=messages, model=ollama_model, stream=False)
|
||||
tools = handler.extract_tools(resp.choices[0].message.content)
|
||||
tool_calls = []
|
||||
for tool in tools:
|
||||
for tool_name, tool_args in tool.items():
|
||||
tool_calls.append({
|
||||
"id": f"call_{random.randint(1000, 10000)}",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": tool_name,
|
||||
"arguments": tool_args
|
||||
}
|
||||
})
|
||||
if tools:
|
||||
resp.choices[0].message.tool_calls = tool_calls
|
||||
resp.choices[0].message.content = None
|
||||
logger.info(f"response (tools): {json.dumps(tools)}")
|
||||
logger.info(f"response: {json.dumps(resp.to_dict())}")
|
||||
return resp
|
||||
Loading…
Add table
Add a link
Reference in a new issue