diff --git a/chatbot_ui/.vscode/launch.json b/chatbot_ui/.vscode/launch.json
index 81605062..d08bb1e4 100644
--- a/chatbot_ui/.vscode/launch.json
+++ b/chatbot_ui/.vscode/launch.json
@@ -11,6 +11,14 @@
       "request": "launch",
       "program": "run.py",
       "console": "integratedTerminal",
+    },
+    {
+      "name": "chatbot-ui streaming",
+      "cwd": "${workspaceFolder}/app",
+      "type": "debugpy",
+      "request": "launch",
+      "program": "run_stream.py",
+      "console": "integratedTerminal",
     }
   ]
 }
diff --git a/chatbot_ui/app/run_stream.py b/chatbot_ui/app/run_stream.py
new file mode 100644
index 00000000..dbcf0df3
--- /dev/null
+++ b/chatbot_ui/app/run_stream.py
@@ -0,0 +1,29 @@
+# copied from https://www.gradio.app/guides/creating-a-chatbot-fast#a-streaming-example-using-openai
+
+import os
+from openai import OpenAI
+import gradio as gr
+
+api_key = os.getenv("OPENAI_API_KEY")
+
+client = OpenAI(api_key=api_key)
+
+def predict(message, history):
+    history_openai_format = []
+    for human, assistant in history:
+        history_openai_format.append({"role": "user", "content": human })
+        history_openai_format.append({"role": "assistant", "content":assistant})
+    history_openai_format.append({"role": "user", "content": message})
+
+    response = client.chat.completions.create(model='gpt-3.5-turbo',
+    messages= history_openai_format,
+    temperature=1.0,
+    stream=True)
+
+    partial_message = ""
+    for chunk in response:
+        if chunk.choices[0].delta.content is not None:
+              partial_message = partial_message + chunk.choices[0].delta.content
+              yield partial_message
+
+gr.ChatInterface(predict).launch(server_name="0.0.0.0", server_port=8081)