mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
address cmt
This commit is contained in:
parent
dd07ba2cd0
commit
1f383eafc4
4 changed files with 27 additions and 23 deletions
|
|
@ -8,8 +8,9 @@ from app.prompt_guard.model_handler import ArchGuardHanlder
|
|||
logger = utils.get_model_server_logger()
|
||||
|
||||
arch_function_hanlder = ArchFunctionHandler()
|
||||
prefill_list = ["May", "Could", "Sure", "Definitely", "Certainly", "Of course", "Can"]
|
||||
prefill_enabled = True
|
||||
PREFILL_LIST = ["May", "Could", "Sure", "Definitely", "Certainly", "Of course", "Can"]
|
||||
PREFILL_ENABLED = True
|
||||
TOOL_CALL_TOKEN = "<tool_call>"
|
||||
arch_function_endpoint = "https://api.fc.archgw.com/v1"
|
||||
arch_function_client = utils.get_client(arch_function_endpoint)
|
||||
arch_function_generation_params = {
|
||||
|
|
|
|||
|
|
@ -87,14 +87,14 @@ async def chat_completion(req: ChatMessage, res: Response):
|
|||
resp = const.arch_function_client.chat.completions.create(
|
||||
messages=messages,
|
||||
model=client_model_name,
|
||||
stream=const.prefill_enabled,
|
||||
stream=const.PREFILL_ENABLED,
|
||||
extra_body=const.arch_function_generation_params,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"model_server <= arch_function: error: {e}")
|
||||
raise
|
||||
|
||||
if const.prefill_enabled:
|
||||
if const.PREFILL_ENABLED:
|
||||
first_token_content = ""
|
||||
for token in resp:
|
||||
first_token_content = token.choices[
|
||||
|
|
@ -104,14 +104,16 @@ async def chat_completion(req: ChatMessage, res: Response):
|
|||
break
|
||||
|
||||
# Check if the first token requires tool call handling
|
||||
if first_token_content != "<tool_call>":
|
||||
if first_token_content != const.TOOL_CALL_TOKEN:
|
||||
# Engage pre-filling response if no tool call is indicated
|
||||
resp.close()
|
||||
logger.info("Tool call is not found! Engage pre filling")
|
||||
prefill_content = random.choice(const.prefill_list)
|
||||
prefill_content = random.choice(const.PREFILL_LIST)
|
||||
messages.append({"role": "assistant", "content": prefill_content})
|
||||
|
||||
# Send a new completion request with the updated messages
|
||||
# the model will continue the final message in the chat instead of starting a new one
|
||||
# disable add_generation_prompt which tells the template to add tokens that indicate the start of a bot response.
|
||||
extra_body = {
|
||||
**const.arch_function_generation_params,
|
||||
"continue_final_message": True,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue