use openai standard response in arch-fc and in gradio client (#62)

* use openai standard response in arch-fc and in gradio client also fix code bug in usage * fix int test
2026-07-14 16:22:12 +02:00 · 2024-09-19 12:19:14 -07:00 · 2024-09-19 12:19:14 -07:00 · 2cd5ec5adf
commit 2cd5ec5adf
parent ed6a9139e6
6 changed files with 89 additions and 133 deletions
--- a/chatbot_ui/app/run.py
+++ b/chatbot_ui/app/run.py
@ -1,109 +1,52 @@
+import os
+from openai import OpenAI
 import gradio as gr

-import asyncio
-import httpx
-import async_timeout
+OPEN_API_KEY=os.getenv("OPENAI_API_KEY")
+CHAT_COMPLETION_ENDPOINT = os.getenv("CHAT_COMPLETION_ENDPOINT", "https://api.openai.com/v1")
+MODEL_NAME = os.getenv("MODEL_NAME", "gpt-3.5-turbo")

-from loguru import logger
-from typing import Optional, List
-from pydantic import BaseModel
-from dotenv import load_dotenv
+client = OpenAI(api_key=OPEN_API_KEY, base_url=CHAT_COMPLETION_ENDPOINT)

-import os
-load_dotenv()
+def predict(message, history):
+    # history_openai_format = []
+    # for human, assistant in history:
+    #     history_openai_format.append({"role": "user", "content": human })
+    #     history_openai_format.append({"role": "assistant", "content":assistant})
+    history.append({"role": "user", "content": message})

-OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
-CHAT_COMPLETION_ENDPOINT = os.getenv("CHAT_COMPLETION_ENDPOINT", "https://api.openai.com/v1/chat/completions")
-
-
-class Message(BaseModel):
-
-    role: str
-    content: str
-    # model is additional state we maintin on client side so that bolt gateway can know which model responded to user prompt
-    model: str
-    resolver: str
-
-async def make_completion(messages:List[Message], nb_retries:int=3, delay:int=120) -> Optional[str]:
-    """
-    Sends a request to the ChatGPT API to retrieve a response based on a list of previous messages.
-    """
-    header = {
-        "Content-Type": "application/json",
-    }
-
-    if OPENAI_API_KEY is not None and OPENAI_API_KEY != "":
-        header["Authorization"] = f"Bearer {OPENAI_API_KEY}"
-
-    if OPENAI_API_KEY is None or OPENAI_API_KEY == "":
-        if CHAT_COMPLETION_ENDPOINT.startswith("https://api.openai.com"):
-          logger.error("No OpenAI API Key found. Please create .env file and set OPENAI_API_KEY env var !")
-          return None
    try:
-        async with async_timeout.timeout(delay=delay):
-            async with httpx.AsyncClient(headers=header) as aio_client:
-                counter = 0
-                keep_loop = True
-                while keep_loop:
-                    logger.debug(f"Chat/Completions Nb Retries : {counter}")
-                    try:
-                        resp = await aio_client.post(
-                            url = CHAT_COMPLETION_ENDPOINT,
-                            json = {
-                                "model": "gpt-3.5-turbo",
-                                "messages": messages
-                            },
-                            timeout=delay
-                        )
-                        logger.debug(f"Status Code : {resp.status_code}")
-                        if resp.status_code == 200:
-                            resp_json = resp.json()
-                            model = resp_json["model"]
-                            msg = {}
-                            msg["role"] = "assistant"
-                            msg["model"] = model
-                            if "resolver_name" in resp_json:
-                                msg["resolver"] = resp_json["resolver_name"]
-                            if "choices" in resp_json:
-                                msg["content"] = resp_json["choices"][0]["message"]["content"]
-                                return msg
-                            elif "message" in resp_json:
-                                msg["content"] = resp_json["message"]["content"]
-                                return msg
-                            keep_loop = False
-                        else:
-                            logger.warning(resp.content)
-                            keep_loop = False
-                    except Exception as e:
-                        logger.error(e)
-                        counter = counter + 1
-                        keep_loop = counter < nb_retries
-    except asyncio.TimeoutError as e:
-        logger.error(f"Timeout {delay} seconds !")
-    return None
+      response = client.chat.completions.create(model='gpt-3.5-turbo',
+        messages= history,
+        temperature=1.0
+      )
+    except Exception as e:
+      print(e)
+      # remove last user message in case of exception
+      history.pop()
+      raise gr.Error("Error with OpenAI API: {}".format(e.message))
+
+    # for chunk in response:
+    #     if chunk.choices[0].delta.content is not None:
+    #           partial_message = partial_message + chunk.choices[0].delta.content
+    #           yield partial_message
+    choices = response.choices
+    message = choices[0].message
+    content = message.content
+    history.append({"role": "assistant", "content": content})
+    history[-1]["model"] = response.model

-async def predict(input, history):
-    """
-    Predict the response of the chatbot and complete a running list of chat history.
-    """
-    history.append({"role": "user", "content": input})
-    response = await make_completion(history)
-    print(response)
-    if response is not None:
-      history.append(response)
    messages = [(history[i]["content"], history[i+1]["content"]) for i in range(0, len(history)-1, 2)]
    return messages, history

-"""
-Gradio Blocks low-level API that allows to create custom web applications (here our chat app)
-"""
-# with fill_height=true the chatbot to fill the height of the page
+
 with gr.Blocks(fill_height=True, css="footer {visibility: hidden}") as demo:
-    logger.info("Starting Demo...")
+    print("Starting Demo...")
    chatbot = gr.Chatbot(label="Bolt Chatbot", scale=1)
    state = gr.State([])
    with gr.Row():
-        txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter", scale=1)
+        txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter", scale=1, autofocus=True)
+
    txt.submit(predict, [txt, state], [chatbot, state])

-demo.launch(server_name="0.0.0.0", server_port=8080)
+demo.launch(server_name="0.0.0.0", server_port=8080, show_error=True)
--- a/envoyfilter/src/stream_context.rs
+++ b/envoyfilter/src/stream_context.rs
@ -21,7 +21,7 @@ use public_types::common_types::open_ai::{
    StreamOptions,
 };
 use public_types::common_types::{
-    BoltFCResponse, BoltFCToolsCall, EmbeddingType, ToolParameter, ToolParameters, ToolsDefinition,
+    BoltFCToolsCall, EmbeddingType, ToolParameter, ToolParameters, ToolsDefinition,
    ZeroShotClassificationRequest, ZeroShotClassificationResponse,
 };
 use public_types::configuration::{Overrides, PromptTarget, PromptType};
@ -427,9 +427,9 @@ impl StreamContext {
        let body_str = String::from_utf8(body).unwrap();
        debug!("function_resolver response str: {:?}", body_str);

-        let mut boltfc_response: BoltFCResponse = serde_json::from_str(&body_str).unwrap();
+        let boltfc_response: ChatCompletionsResponse = serde_json::from_str(&body_str).unwrap();

-        let boltfc_response_str = boltfc_response.message.content.as_ref().unwrap();
+        let boltfc_response_str = boltfc_response.choices[0].message.content.as_ref().unwrap();

        let tools_call_response: BoltFCToolsCall = match serde_json::from_str(boltfc_response_str) {
            Ok(fc_resp) => fc_resp,
@ -439,7 +439,6 @@ impl StreamContext {
                // Let's send the response back to the user to initalize lightweight dialog for parameter collection

                // add resolver name to the response so the client can send the response back to the correct resolver
-                boltfc_response.resolver_name = Some(callout_context.prompt_target_name.unwrap());
                info!("some requred parameters are missing, sending response from Bolt FC back to user for parameter collection: {}", e);
                let bolt_fc_dialogue_message = serde_json::to_string(&boltfc_response).unwrap();
                self.send_http_response(
@ -826,7 +825,7 @@ impl HttpContext for StreamContext {
                    }
                };

-            self.response_tokens += chat_completions_response.usage.completions_tokens;
+            self.response_tokens += chat_completions_response.usage.completion_tokens;
        }

        debug!(
--- a/envoyfilter/tests/integration.rs
+++ b/envoyfilter/tests/integration.rs
@ -9,7 +9,8 @@ use proxy_wasm_test_framework::types::{
    Action, BufferType, LogLevel, MapType, MetricType, ReturnType,
 };
 use public_types::common_types::{
-    open_ai::Message, BoltFCResponse, BoltFCToolsCall, IntOrString, ToolCallDetail,
+    open_ai::{ChatCompletionsResponse, Choice, Message, Usage},
+    BoltFCToolsCall, IntOrString, ToolCallDetail,
 };
 use public_types::{common_types::ZeroShotClassificationResponse, configuration::Configuration};
 use serial_test::serial;
@ -426,16 +427,20 @@ fn request_ratelimited() {
        tool_calls: tool_call_detail,
    };

-    let bolt_fc_resp = BoltFCResponse {
-        model: String::from("test"),
-        message: Message {
-            role: String::from("system"),
-            content: Some(serde_json::to_string(&boltfc_tools_call).unwrap()),
-            model: None,
+    let bolt_fc_resp = ChatCompletionsResponse {
+        usage: Usage {
+            completion_tokens: 0,
        },
-        done_reason: String::from("test"),
-        done: true,
-        resolver_name: None,
+        choices: vec![Choice {
+            finish_reason: "test".to_string(),
+            index: 0,
+            message: Message {
+                role: "system".to_string(),
+                content: Some(serde_json::to_string(&boltfc_tools_call).unwrap()),
+                model: None,
+            },
+        }],
+        model: String::from("test"),
    };

    let bolt_fc_resp_str = serde_json::to_string(&bolt_fc_resp).unwrap();
@ -535,16 +540,20 @@ fn request_not_ratelimited() {
        tool_calls: tool_call_detail,
    };

-    let bolt_fc_resp = BoltFCResponse {
-        model: String::from("test"),
-        message: Message {
-            role: String::from("system"),
-            content: Some(serde_json::to_string(&boltfc_tools_call).unwrap()),
-            model: None,
+    let bolt_fc_resp = ChatCompletionsResponse {
+        usage: Usage {
+            completion_tokens: 0,
        },
-        done_reason: String::from("test"),
-        done: true,
-        resolver_name: None,
+        choices: vec![Choice {
+            finish_reason: "test".to_string(),
+            index: 0,
+            message: Message {
+                role: "system".to_string(),
+                content: Some(serde_json::to_string(&boltfc_tools_call).unwrap()),
+                model: None,
+            },
+        }],
+        model: String::from("test"),
    };

    let bolt_fc_resp_str = serde_json::to_string(&bolt_fc_resp).unwrap();
--- a/function_resolver/app/main.py
+++ b/function_resolver/app/main.py
@ -2,7 +2,7 @@ from fastapi import FastAPI, Response
 from bolt_handler import BoltHandler
 from common import ChatMessage
 import logging
-from ollama import Client
+from openai import OpenAI
 import os

 ollama_endpoint = os.getenv("OLLAMA_ENDPOINT", "localhost")
@ -15,8 +15,12 @@ logger.info(f"using ollama endpoint: {ollama_endpoint}")
 app = FastAPI()
 handler = BoltHandler()

-ollama_client = Client(host=ollama_endpoint)
+client = OpenAI(
+    base_url='http://{}:11434/v1/'.format(ollama_endpoint),

+    # required but ignored
+    api_key='ollama',
+)

@app.get("/healthz")
 async def healthz():
@ -35,6 +39,6 @@ async def chat_completion(req: ChatMessage, res: Response):
    )
    messages.append({"role": "user", "content": req.messages[-1].content})

-    resp = ollama_client.chat(messages=messages, model=ollama_model, stream=False)
+    resp = client.chat.completions.create(messages=messages, model=ollama_model, stream=False)
    logger.info(f"response: {resp}")
    return resp
--- a/function_resolver/requirements.txt
+++ b/function_resolver/requirements.txt
@ -1,4 +1,4 @@
 fastapi
 uvicorn
-ollama
 PyYAML
+openai
--- a/public_types/src/common_types.rs
+++ b/public_types/src/common_types.rs
@ -61,15 +61,6 @@ pub struct ToolsDefinition {
    pub parameters: ToolParameters,
 }

-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct BoltFCResponse {
-    pub model: String,
-    pub message: open_ai::Message,
-    pub done_reason: String,
-    pub done: bool,
-    pub resolver_name: Option<String>,
-}
-
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[serde(untagged)]
 pub enum IntOrString {
@ -118,24 +109,34 @@ pub mod open_ai {
        #[serde(skip_serializing_if = "Option::is_none")]
        pub model: Option<String>,
    }
+
+    #[derive(Debug, Clone, Serialize, Deserialize)]
+    pub struct Choice {
+        pub finish_reason: String,
+        pub index: usize,
+        pub message: Message,
+    }
+
    #[derive(Debug, Clone, Serialize, Deserialize)]
    pub struct ChatCompletionsResponse {
        pub usage: Usage,
+        pub choices: Vec<Choice>,
+        pub model: String
    }

    #[derive(Debug, Clone, Serialize, Deserialize)]
    pub struct Usage {
-        pub completions_tokens: usize,
+        pub completion_tokens: usize,
    }

    #[derive(Debug, Clone, Serialize, Deserialize)]
    pub struct ChatCompletionChunkResponse {
        pub model: String,
-        pub choices: Vec<Choice>,
+        pub choices: Vec<ChunkChoice>,
    }

    #[derive(Debug, Clone, Serialize, Deserialize)]
-    pub struct Choice {
+    pub struct ChunkChoice {
        pub delta: Delta,
        // TODO: could this be an enum?
        pub finish_reason: Option<String>,