mirror of
https://github.com/katanemo/plano.git
synced 2026-05-21 13:55:15 +02:00
don't compute embeddings for names and other fixes see description (#126)
* serialize tools - 2 * fix int tests * fix int test * fix unit tests
This commit is contained in:
parent
0e5ea3d6db
commit
2a747df7c0
16 changed files with 125 additions and 86 deletions
|
|
@ -3,11 +3,12 @@ import random
|
|||
from fastapi import FastAPI, Response
|
||||
from app.arch_fc.arch_handler import ArchHandler
|
||||
from app.arch_fc.bolt_handler import BoltHandler
|
||||
from app.arch_fc.common import ChatMessage
|
||||
from app.arch_fc.common import ChatMessage, Message
|
||||
import logging
|
||||
import yaml
|
||||
from openai import OpenAI
|
||||
import os
|
||||
import hashlib
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
||||
|
|
@ -51,14 +52,54 @@ logger.info(f"serving mode: {mode}")
|
|||
logger.info(f"using model: {chosen_model}")
|
||||
logger.info(f"using endpoint: {endpoint}")
|
||||
|
||||
def process_state(arch_state, history: list[Message]):
|
||||
print("state: {}".format(arch_state))
|
||||
state_json = json.loads(arch_state)
|
||||
|
||||
state_map = {}
|
||||
if state_json:
|
||||
for tools_state in state_json:
|
||||
for tool_state in tools_state:
|
||||
state_map[tool_state['key']] = tool_state
|
||||
|
||||
print(f"state_map: {json.dumps(state_map)}")
|
||||
|
||||
sha_history = []
|
||||
updated_history = []
|
||||
for hist in history:
|
||||
updated_history.append({"role": hist.role, "content": hist.content})
|
||||
if hist.role == 'user':
|
||||
sha_history.append(hist.content)
|
||||
sha256_hash = hashlib.sha256()
|
||||
sha256_hash.update(json.dumps(sha_history).encode())
|
||||
sha_key = sha256_hash.hexdigest()
|
||||
print(f"sha_key: {sha_key}")
|
||||
if sha_key in state_map:
|
||||
tool_call_state = state_map[sha_key]
|
||||
if 'tool_call' in tool_call_state:
|
||||
tool_call_str = json.dumps(tool_call_state['tool_call'])
|
||||
updated_history.append({"role": "assistant", "content": f"<tool_call>\n{tool_call_str}\n</tool_call>"})
|
||||
if 'tool_response' in tool_call_state:
|
||||
tool_resp = tool_call_state['tool_response']
|
||||
#TODO: try with role = user as well
|
||||
updated_history.append({"role": "user", "content": f"<tool_response>\n{tool_resp}\n</tool_response>"})
|
||||
# we dont want to match this state with any other messages
|
||||
del(state_map[sha_key])
|
||||
|
||||
return updated_history
|
||||
|
||||
async def chat_completion(req: ChatMessage, res: Response):
|
||||
logger.info("starting request")
|
||||
tools_encoded = handler._format_system(req.tools)
|
||||
# append system prompt with tools to messages
|
||||
messages = [{"role": "system", "content": tools_encoded}]
|
||||
for message in req.messages:
|
||||
messages.append({"role": message.role, "content": message.content})
|
||||
logger.info(f"request model: {chosen_model}, messages: {json.dumps(messages)}")
|
||||
metadata = req.metadata
|
||||
arch_state = metadata.get("x-arch-state", "[]")
|
||||
updated_history = process_state(arch_state, req.messages)
|
||||
for message in updated_history:
|
||||
messages.append({"role": message["role"], "content": message["content"]})
|
||||
|
||||
logger.info(f"model_server => arch_fc: {chosen_model}, messages: {json.dumps(messages)}")
|
||||
completions_params = params["params"]
|
||||
resp = client.chat.completions.create(
|
||||
messages=messages,
|
||||
|
|
@ -80,6 +121,6 @@ async def chat_completion(req: ChatMessage, res: Response):
|
|||
if tools:
|
||||
resp.choices[0].message.tool_calls = tool_calls
|
||||
resp.choices[0].message.content = None
|
||||
logger.info(f"response (tools): {json.dumps(tools)}")
|
||||
logger.info(f"response: {json.dumps(resp.to_dict())}")
|
||||
logger.info(f"model_server <= arch_fc: (tools): {json.dumps(tools)}")
|
||||
logger.info(f"model_server <= arch_fc: response body: {json.dumps(resp.to_dict())}")
|
||||
return resp
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue