plano/model_server/app/function_calling/model_utils.py

import json
import hashlib
import app.commons.constants as const
import random
from fastapi import Response
from pydantic import BaseModel
from app.commons.utilities import get_model_server_logger
from typing import Any, Dict, List, Optional


logger = get_model_server_logger()


class Message(BaseModel):
    role: Optional[str] = ""
    content: Optional[str] = ""
    tool_calls: Optional[List[Dict[str, Any]]] = []
    tool_call_id: Optional[str] = ""


class ChatMessage(BaseModel):
    messages: list[Message]
    tools: List[Dict[str, Any]]


class Choice(BaseModel):
    message: Message
    finish_reason: Optional[str] = "stop"
    index: Optional[int] = 0


class ChatCompletionResponse(BaseModel):
    choices: List[Choice]
    model: Optional[str] = "Arch-Function"
    created: Optional[str] = ""
    id: Optional[str] = ""
    object: Optional[str] = "chat_completion"


def process_messages(history: list[Message]):
    updated_history = []
    for hist in history:
        if hist.tool_calls:
            if len(hist.tool_calls) > 1:
                error_msg = f"Only one tool call is supported, tools counts: {len(hist.tool_calls)}"
                logger.error(error_msg)
                raise ValueError(error_msg)
            tool_call_str = json.dumps(hist.tool_calls[0]["function"])
            updated_history.append(
                {
                    "role": "assistant",
                    "content": f"<tool_call>\n{tool_call_str}\n</tool_call>",
                }
            )
        elif hist.role == "tool":
            updated_history.append(
                {
                    "role": "user",
                    "content": f"<tool_response>\n{hist.content}\n</tool_response>",
                }
            )
        else:
            updated_history.append({"role": hist.role, "content": hist.content})
    return updated_history


async def chat_completion(req: ChatMessage, res: Response):
    logger.info("starting request")

    tools_encoded = const.arch_function_hanlder._format_system(req.tools)

    messages = [{"role": "system", "content": tools_encoded}]

    updated_history = process_messages(req.messages)
    for message in updated_history:
        messages.append({"role": message["role"], "content": message["content"]})

    client_model_name = const.arch_function_client.models.list().data[0].id

    logger.info(
        f"model_server => arch_function: {client_model_name}, messages: {json.dumps(messages)}"
    )

    # Retrieve the first token, handling the Stream object carefully

    try:
        resp = const.arch_function_client.chat.completions.create(
            messages=messages,
            model=client_model_name,
            stream=const.PREFILL_ENABLED,
            extra_body=const.arch_function_generation_params,
        )
    except Exception as e:
        logger.error(f"model_server <= arch_function: error: {e}")
        raise

    if const.PREFILL_ENABLED:
        first_token_content = ""
        for token in resp:
            first_token_content = token.choices[
                0
            ].delta.content.strip()  # Clean up the content
            if first_token_content:  # Break if it's non-empty
                break

        # Check if the first token requires tool call handling
        if first_token_content != const.TOOL_CALL_TOKEN:
            # Engage pre-filling response if no tool call is indicated
            resp.close()
            logger.info("Tool call is not found! Engage pre filling")
            prefill_content = random.choice(const.PREFILL_LIST)
            messages.append({"role": "assistant", "content": prefill_content})

            # Send a new completion request with the updated messages
            # the model will continue the final message in the chat instead of starting a new one
            # disable add_generation_prompt which tells the template to add tokens that indicate the start of a bot response.
            extra_body = {
                **const.arch_function_generation_params,
                "continue_final_message": True,
                "add_generation_prompt": False,
            }
            pre_fill_resp = const.arch_function_client.chat.completions.create(
                messages=messages,
                model=client_model_name,
                stream=False,
                extra_body=extra_body,
            )
            full_response = pre_fill_resp.choices[0].message.content
        else:
            # Initialize full response and iterate over tokens to gather the full response
            full_response = first_token_content
            for token in resp:
                if hasattr(token.choices[0].delta, "content"):
                    full_response += token.choices[0].delta.content
    else:
        logger.info("Stream is disabled, not engaging pre-filling")
        full_response = resp.choices[0].message.content

    tool_calls = const.arch_function_hanlder.extract_tool_calls(full_response)

    if tool_calls:
        message = Message(content="", tool_calls=tool_calls)
    else:
        message = Message(content=full_response, tool_calls=[])
    choice = Choice(message=message)
    chat_completion_response = ChatCompletionResponse(
        choices=[choice], model=client_model_name
    )

    logger.info(
        f"model_server <= arch_function: (tools): {json.dumps([tool_call['function'] for tool_call in tool_calls])}"
    )
    logger.info(
        f"model_server <= arch_function: response body: {json.dumps(chat_completion_response.dict())}"
    )

    return chat_completion_response
send history to bolt fc model (#84) 2024-09-25 12:03:44 -07:00			`import json`
don't compute embeddings for names and other fixes see description (#126) * serialize tools - 2 * fix int tests * fix int test * fix unit tests 2024-10-05 19:25:16 -07:00			`import hashlib`
Update model_server (#164) * Update model server * Delete model_server/.vscode/settings.json * Update loader.py * Fix errors * Update log mode 2024-10-09 18:04:52 -07:00			`import app.commons.constants as const`
feedback 2024-10-31 14:49:03 -07:00			`import random`
Update model_server (#164) * Update model server * Delete model_server/.vscode/settings.json * Update loader.py * Fix errors * Update log mode 2024-10-09 18:04:52 -07:00			`from fastapi import Response`
			`from pydantic import BaseModel`
			`from app.commons.utilities import get_model_server_logger`
add prefill and test 2024-10-30 17:00:30 -07:00			`from typing import Any, Dict, List, Optional`
Update model_server (#164) * Update model server * Delete model_server/.vscode/settings.json * Update loader.py * Fix errors * Update log mode 2024-10-09 18:04:52 -07:00
Add function calling support using bolt-fc-1b (#35) 2024-09-10 14:24:46 -07:00
ensure that we can call the new api.fc.archgw.com url, logging fixes … (#142) * ensure that we can call the new api.fc.archgw.com url, logging fixes and minor cli bug fixes * fixed a bug where model_server printed on terminal after start script stopped running * updating the logo and fixing the website styles * updated the branch with feedback from Co and Adil --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local> 2024-10-08 12:40:24 -07:00			`logger = get_model_server_logger()`

add support for default target (#111) * add support for default target * add more fixes 2024-10-02 20:43:16 -07:00
Update model_server (#164) * Update model server * Delete model_server/.vscode/settings.json * Update loader.py * Fix errors * Update log mode 2024-10-09 18:04:52 -07:00			`class Message(BaseModel):`
add prefill and test 2024-10-30 17:00:30 -07:00			`role: Optional[str] = ""`
			`content: Optional[str] = ""`
			`tool_calls: Optional[List[Dict[str, Any]]] = []`
			`tool_call_id: Optional[str] = ""`
Update model_server (#164) * Update model server * Delete model_server/.vscode/settings.json * Update loader.py * Fix errors * Update log mode 2024-10-09 18:04:52 -07:00

			`class ChatMessage(BaseModel):`
			`messages: list[Message]`
			`tools: List[Dict[str, Any]]`

Add function calling support using bolt-fc-1b (#35) 2024-09-10 14:24:46 -07:00
add prefill and test 2024-10-30 17:00:30 -07:00			`class Choice(BaseModel):`
			`message: Message`
fix 2024-10-31 12:51:11 -07:00			`finish_reason: Optional[str] = "stop"`
			`index: Optional[int] = 0`
add prefill and test 2024-10-30 17:00:30 -07:00

			`class ChatCompletionResponse(BaseModel):`
			`choices: List[Choice]`
fix 2024-10-31 12:51:11 -07:00			`model: Optional[str] = "Arch-Function"`
			`created: Optional[str] = ""`
			`id: Optional[str] = ""`
			`object: Optional[str] = "chat_completion"`
add prefill and test 2024-10-30 17:00:30 -07:00

Pass tool call and app function response back in metadata (#193) 2024-10-18 13:25:39 -07:00			`def process_messages(history: list[Message]):`
don't compute embeddings for names and other fixes see description (#126) * serialize tools - 2 * fix int tests * fix int test * fix unit tests 2024-10-05 19:25:16 -07:00			`updated_history = []`
			`for hist in history:`
Pass tool call and app function response back in metadata (#193) 2024-10-18 13:25:39 -07:00			`if hist.tool_calls:`
			`if len(hist.tool_calls) > 1:`
HR agent demo (#206) * commiting my hr_agent branch * updating the HR agent config * pushing to remote * fix hr agent * committing to merge with main * updating to merge from main * updating the demo and model-server-tests to pull from poetry * updating the poetry.lock files * updating based on feedback * updated sysmte prompt for hr_agent --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local> Co-authored-by: Adil Hafeez <adil@katanemo.com> 2024-10-23 14:32:40 -07:00			`error_msg = f"Only one tool call is supported, tools counts: {len(hist.tool_calls)}"`
			`logger.error(error_msg)`
			`raise ValueError(error_msg)`
Pass tool call and app function response back in metadata (#193) 2024-10-18 13:25:39 -07:00			`tool_call_str = json.dumps(hist.tool_calls[0]["function"])`
			`updated_history.append(`
			`{`
			`"role": "assistant",`
			`"content": f"<tool_call>\n{tool_call_str}\n</tool_call>",`
			`}`
			`)`
			`elif hist.role == "tool":`
			`updated_history.append(`
			`{`
			`"role": "user",`
			`"content": f"<tool_response>\n{hist.content}\n</tool_response>",`
			`}`
			`)`
			`else:`
			`updated_history.append({"role": hist.role, "content": hist.content})`
don't compute embeddings for names and other fixes see description (#126) * serialize tools - 2 * fix int tests * fix int test * fix unit tests 2024-10-05 19:25:16 -07:00			`return updated_history`

formating and mointoring change (#136) 2024-10-07 15:21:05 -07:00
update fix 2024-11-06 16:16:08 -08:00			`async def chat_completion(req: ChatMessage, res: Response):`
Add function calling support using bolt-fc-1b (#35) 2024-09-10 14:24:46 -07:00			`logger.info("starting request")`
Update model_server (#164) * Update model server * Delete model_server/.vscode/settings.json * Update loader.py * Fix errors * Update log mode 2024-10-09 18:04:52 -07:00
			`tools_encoded = const.arch_function_hanlder._format_system(req.tools)`

send history to bolt fc model (#84) 2024-09-25 12:03:44 -07:00			`messages = [{"role": "system", "content": tools_encoded}]`
Update model_server (#164) * Update model server * Delete model_server/.vscode/settings.json * Update loader.py * Fix errors * Update log mode 2024-10-09 18:04:52 -07:00
Pass tool call and app function response back in metadata (#193) 2024-10-18 13:25:39 -07:00			`updated_history = process_messages(req.messages)`
don't compute embeddings for names and other fixes see description (#126) * serialize tools - 2 * fix int tests * fix int test * fix unit tests 2024-10-05 19:25:16 -07:00			`for message in updated_history:`
			`messages.append({"role": message["role"], "content": message["content"]})`

Update model_server (#164) * Update model server * Delete model_server/.vscode/settings.json * Update loader.py * Fix errors * Update log mode 2024-10-09 18:04:52 -07:00			`client_model_name = const.arch_function_client.models.list().data[0].id`

formating and mointoring change (#136) 2024-10-07 15:21:05 -07:00			`logger.info(`
Update model_server (#164) * Update model server * Delete model_server/.vscode/settings.json * Update loader.py * Fix errors * Update log mode 2024-10-09 18:04:52 -07:00			`f"model_server => arch_function: {client_model_name}, messages: {json.dumps(messages)}"`
formating and mointoring change (#136) 2024-10-07 15:21:05 -07:00			`)`
Update model_server (#164) * Update model server * Delete model_server/.vscode/settings.json * Update loader.py * Fix errors * Update log mode 2024-10-09 18:04:52 -07:00
add prefill and test 2024-10-30 17:00:30 -07:00			`# Retrieve the first token, handling the Stream object carefully`
feedback 2024-10-31 14:49:03 -07:00
update 2024-11-04 10:21:11 -08:00			`try:`
			`resp = const.arch_function_client.chat.completions.create(`
			`messages=messages,`
			`model=client_model_name,`
address cmt 2024-11-07 11:15:03 -08:00			`stream=const.PREFILL_ENABLED,`
update 2024-11-04 10:21:11 -08:00			`extra_body=const.arch_function_generation_params,`
			`)`
			`except Exception as e:`
			`logger.error(f"model_server <= arch_function: error: {e}")`
			`raise`
address comments 2024-11-01 10:43:34 -07:00
address cmt 2024-11-07 11:15:03 -08:00			`if const.PREFILL_ENABLED:`
feedback 2024-10-31 14:49:03 -07:00			`first_token_content = ""`
			`for token in resp:`
			`first_token_content = token.choices[`
add prefill and test 2024-10-30 17:00:30 -07:00			`0`
			`].delta.content.strip() # Clean up the content`
			`if first_token_content: # Break if it's non-empty`
			`break`
feedback 2024-10-31 14:49:03 -07:00
			`# Check if the first token requires tool call handling`
address cmt 2024-11-07 11:15:03 -08:00			`if first_token_content != const.TOOL_CALL_TOKEN:`
feedback 2024-10-31 14:49:03 -07:00			`# Engage pre-filling response if no tool call is indicated`
			`resp.close()`
			`logger.info("Tool call is not found! Engage pre filling")`
address cmt 2024-11-07 11:15:03 -08:00			`prefill_content = random.choice(const.PREFILL_LIST)`
feedback 2024-10-31 14:49:03 -07:00			`messages.append({"role": "assistant", "content": prefill_content})`

			`# Send a new completion request with the updated messages`
address cmt 2024-11-07 11:15:03 -08:00			`# the model will continue the final message in the chat instead of starting a new one`
			`# disable add_generation_prompt which tells the template to add tokens that indicate the start of a bot response.`
address comments 2024-11-01 10:43:34 -07:00			`extra_body = {`
			`**const.arch_function_generation_params,`
			`"continue_final_message": True,`
			`"add_generation_prompt": False,`
			`}`
feedback 2024-10-31 14:49:03 -07:00			`pre_fill_resp = const.arch_function_client.chat.completions.create(`
			`messages=messages,`
			`model=client_model_name,`
			`stream=False,`
address comments 2024-11-01 10:43:34 -07:00			`extra_body=extra_body,`
feedback 2024-10-31 14:49:03 -07:00			`)`
			`full_response = pre_fill_resp.choices[0].message.content`
			`else:`
			`# Initialize full response and iterate over tokens to gather the full response`
			`full_response = first_token_content`
			`for token in resp:`
add prefill and test 2024-10-30 17:00:30 -07:00			`if hasattr(token.choices[0].delta, "content"):`
			`full_response += token.choices[0].delta.content`
feedback 2024-10-31 14:49:03 -07:00			`else:`
add e2e test 2024-11-05 08:42:57 -08:00			`logger.info("Stream is disabled, not engaging pre-filling")`
update 2024-11-04 10:21:11 -08:00			`full_response = resp.choices[0].message.content`
add prefill and test 2024-10-30 17:00:30 -07:00
			`tool_calls = const.arch_function_hanlder.extract_tool_calls(full_response)`
Update model_server (#164) * Update model server * Delete model_server/.vscode/settings.json * Update loader.py * Fix errors * Update log mode 2024-10-09 18:04:52 -07:00
			`if tool_calls:`
add prefill and test 2024-10-30 17:00:30 -07:00			`message = Message(content="", tool_calls=tool_calls)`
			`else:`
			`message = Message(content=full_response, tool_calls=[])`
			`choice = Choice(message=message)`
fix 2024-10-31 12:51:11 -07:00			`chat_completion_response = ChatCompletionResponse(`
			`choices=[choice], model=client_model_name`
			`)`
Update model_server (#164) * Update model server * Delete model_server/.vscode/settings.json * Update loader.py * Fix errors * Update log mode 2024-10-09 18:04:52 -07:00
			`logger.info(`
			`f"model_server <= arch_function: (tools): {json.dumps([tool_call['function'] for tool_call in tool_calls])}"`
			`)`
			`logger.info(`
add prefill and test 2024-10-30 17:00:30 -07:00			`f"model_server <= arch_function: response body: {json.dumps(chat_completion_response.dict())}"`
Update model_server (#164) * Update model server * Delete model_server/.vscode/settings.json * Update loader.py * Fix errors * Update log mode 2024-10-09 18:04:52 -07:00			`)`

add prefill and test 2024-10-30 17:00:30 -07:00			`return chat_completion_response`