diff --git a/arch/src/stream_context.rs b/arch/src/stream_context.rs
index 5f5b9e4d..9824ede1 100644
--- a/arch/src/stream_context.rs
+++ b/arch/src/stream_context.rs
@@ -830,7 +830,7 @@ impl StreamContext {
         };
 
         let final_prompt = format!(
-            "{}\nhere is context: {}",
+            "{}\ncontext: {}",
             user_message.content.unwrap(),
             app_function_call_response_str
         );
diff --git a/demos/function_calling/Arch-Function-Calling-1.5B-Q4_K_M.model_file b/demos/function_calling/Arch-Function-Calling-1.5B-Q4_K_M.model_file
deleted file mode 100644
index ea4c5d8d..00000000
--- a/demos/function_calling/Arch-Function-Calling-1.5B-Q4_K_M.model_file
+++ /dev/null
@@ -1,24 +0,0 @@
-FROM Arch-Function-Calling-1.5B-Q4_K_M.gguf
-
-# Set parameters for response generation
-PARAMETER num_ctx 4096
-PARAMETER num_predict 1024
-PARAMETER temperature 0.001
-PARAMETER top_p 1.0
-PARAMETER top_k 50
-PARAMETER repeat_penalty 1.0
-PARAMETER stop "<|im_start|>"
-PARAMETER stop "<|im_end|>"
-
-# Set the random number seed to use for generation
-PARAMETER seed 42
-
-# Set the prompt template to be passed into the model
-TEMPLATE """
-{{- if .System }}<|im_start|>system
-{{ .System }}<|im_end|>
-{{ end }}{{ if .Prompt }}
-<|im_start|>user
-{{ .Prompt }}<|im_end|>
-{{ end }}<|im_start|>assistant
-{{ .Response }}<|im_end|>"""
diff --git a/demos/function_calling/Arch-Function-Calling-3B-Q4_K_M.model_file b/demos/function_calling/Arch-Function-Calling-3B-Q4_K_M.model_file
deleted file mode 100644
index 2d8619a4..00000000
--- a/demos/function_calling/Arch-Function-Calling-3B-Q4_K_M.model_file
+++ /dev/null
@@ -1,24 +0,0 @@
-FROM Arch-Function-Calling-3B-Q4_K_M.gguf
-
-# Set parameters for response generation
-PARAMETER num_ctx 4096
-PARAMETER num_predict 1024
-PARAMETER temperature 0.001
-PARAMETER top_p 1.0
-PARAMETER top_k 50
-PARAMETER repeat_penalty 1.0
-PARAMETER stop "<|im_start|>"
-PARAMETER stop "<|im_end|>"
-
-# Set the random number seed to use for generation
-PARAMETER seed 42
-
-# Set the prompt template to be passed into the model
-TEMPLATE """
-{{- if .System }}<|im_start|>system
-{{ .System }}<|im_end|>
-{{ end }}{{ if .Prompt }}
-<|im_start|>user
-{{ .Prompt }}<|im_end|>
-{{ end }}<|im_start|>assistant
-{{ .Response }}<|im_end|>"""
diff --git a/demos/insurance_agent/Dockerfile b/demos/insurance_agent/Dockerfile
new file mode 100644
index 00000000..95855991
--- /dev/null
+++ b/demos/insurance_agent/Dockerfile
@@ -0,0 +1,19 @@
+FROM python:3 AS base
+
+FROM base AS builder
+
+WORKDIR /src
+
+COPY requirements.txt /src/
+RUN pip install --prefix=/runtime --force-reinstall -r requirements.txt
+
+COPY . /src
+
+FROM python:3-slim AS output
+
+COPY --from=builder /runtime /usr/local
+
+COPY . /app
+WORKDIR /app
+
+CMD ["uvicorn", "insurance_agent_main:app", "--host", "0.0.0.0", "--port", "80", "--log-level", "info"]
diff --git a/demos/insurance_agent/arch_confirg.yaml b/demos/insurance_agent/arch_confirg.yaml
index 48267d0b..c6ea99fe 100644
--- a/demos/insurance_agent/arch_confirg.yaml
+++ b/demos/insurance_agent/arch_confirg.yaml
@@ -1,15 +1,16 @@
-version: "0.1-beta"
+version: v0.1
 listener:
   address: 127.0.0.1
   port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
   message_format: huggingface
 
 system_prompt: |
-  You are an insurance assistant that just offers guidance related to car, boat, rental and home insurnace only.
+  You are an insurance assistant that just offers guidance related to car, boat, rental and home insurnace only. Please be pricese and summarize based on the context provided.
 
 llm_providers:
-  - name: "OpenAI"
-    access_key: $OPEN_AI_KEY
+  - name: OpenAI
+    provider: openai
+    access_key: OPENAI_API_KEY
     model: gpt-4o
     default: true
 
@@ -19,16 +20,16 @@ endpoints:
     # value could be ip address or a hostname with port
     # this could also be a list of endpoints for load balancing
     # for example endpoint: [ ip1:port, ip2:port ]
-    endpoint: "127.0.0.1:80"
+    endpoint: host.docker.internal:18083
     # max time to wait for a connection to be established
-    connect_timeout: 500ms
+    connect_timeout: 0.05s
 
 prompt_targets:
   - name: policy_qa
     endpoint:
       name: app_server
       path: /policy/qa
-    description: "This method handles Q/A related to general issues in insurance. It forwards the conversation to the OpenAI client via a local proxy and returns the response."
+    description: Handle general Q/A related to insurance.
     default: true
 
   - name: get_policy_coverage
@@ -39,51 +40,61 @@ prompt_targets:
     parameters:
     - name: policy_type
       type: str
-      description: The
-      default: 'car'
+      description: The type of policy, option - car, boat, house, motorcycle.
+      default: car
       required: true
 
   - name: initiate_policy
     endpoint:
       name: app_server
       path: /policy/initiate
-    description: Initiate policy coverage for a car, boat, house, or motorcycle.
+    description: Start a policy coverage for car, boat, motorcycle or house.
     parameters:
     - name: policy_type
       type: str
-      description: Field definition from Pydantic model. Requires fixes PolicyRequest
+      description: The type of policy, option - car, boat, house, motorcycle.
+      default: car
+      required: true
+    - name: deductible
+      type: float
+      description: the deductible amount set of the policy
       required: true
-    - name: details
-      type: Unknown
-      description: Field definition from Pydantic model. Requires fixes PolicyRequest
-      required: false
 
   - name: update_claim
     endpoint:
       name: app_server
       path: /policy/claim
-    description: Update the status or details of a claim.
+    description: Update the notes on the claim
     parameters:
     - name: claim_id
-      type: int
-      description: Field definition from Pydantic model. Requires fixes ClaimUpdate
-      required: true
-    - name: update
       type: str
-      description: Field definition from Pydantic model. Requires fixes ClaimUpdate
+      description: the claim number
+      required: true
+    - name: notes
+      type: str
+      description: notes about the cliam number for your adjustor to see
       required: false
 
   - name: update_deductible
     endpoint:
       name: app_server
       path: /policy/deductible
-    description: Update the deductible amount for a specific policy.
+    description: Update the deductible amount for a specific policy coverage.
     parameters:
     - name: policy_id
-      type: int
-      description: Field definition from Pydantic model. Requires fixes DeductibleUpdate
+      type: str
+      description: The id of the policy
       required: true
-    - name: new_deductible
+    - name: deductible
       type: float
-      description: Field definition from Pydantic model. Requires fixes DeductibleUpdate
-      required: false
+      description: the deductible amount set of the policy
+      required: true
+
+ratelimits:
+  - model: gpt-4
+    selector:
+      key: selector-key
+      value: selector-value
+    limit:
+      tokens: 1
+      unit: minute
diff --git a/demos/insurance_agent/docker-compose.yaml b/demos/insurance_agent/docker-compose.yaml
new file mode 100644
index 00000000..c5ca405e
--- /dev/null
+++ b/demos/insurance_agent/docker-compose.yaml
@@ -0,0 +1,22 @@
+services:
+  api_server:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - "18083:80"
+    healthcheck:
+        test: ["CMD", "curl" ,"http://localhost:80/healthz"]
+        interval: 5s
+        retries: 20
+
+  chatbot_ui:
+    build:
+      context: ../../chatbot_ui
+      dockerfile: Dockerfile
+    ports:
+      - "18090:8080"
+    environment:
+      - OPENAI_API_KEY=${OPENAI_API_KEY:?error}
+      - MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
+      - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1
diff --git a/demos/insurance_agent/insurance_agent_main.py b/demos/insurance_agent/insurance_agent_main.py
index 65068adb..6688da12 100644
--- a/demos/insurance_agent/insurance_agent_main.py
+++ b/demos/insurance_agent/insurance_agent_main.py
@@ -1,28 +1,24 @@
-import openai
 from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
-from typing import Optional
+from pydantic import BaseModel, Field
 
 app = FastAPI()
-openai.api_base = "http://127.0.0.1:10000/v1"  # Local proxy
 
-# Data models
+class Conversation(BaseModel):
+    arch_messages: list
 
 class PolicyCoverageRequest(BaseModel):
     policy_type: str = Field(..., description="The type of a policy held by the customer For, e.g. car, boat, house, motorcycle)")
 
-class PolicyRequest(BaseModel):
-    policy_type: str = Field(..., description="The type of a policy held by the customer For, e.g. car, boat, house, motorcycle)")
-    details: str  # Additional details like model, year, etc.
+class PolicyInitiateRequest(PolicyCoverageRequest):
+    deductible: float = Field(..., description="The deductible amount set of the policy")
 
 class ClaimUpdate(BaseModel):
-    policy_id: int
-    claim_id: int
-    update: str  # Status or details of the claim
+    claim_id: str
+    notes: str  # Status or details of the claim
 
 class DeductibleUpdate(BaseModel):
-    policy_id: int
-    new_deductible: float
+    policy_id: str
+    deductible: float
 
 class CoverageResponse(BaseModel):
     policy_type: str
@@ -53,14 +49,14 @@ async def get_policy_coverage(req: PolicyCoverageRequest):
 
 # Initiate policy coverage
 @app.post("/policy/initiate")
-async def initiate_policy(policy_request: PolicyRequest):
+async def initiate_policy(policy_request: PolicyInitiateRequest):
     """
     Initiate policy coverage for a car, boat, house, or motorcycle.
     """
     if policy_request.policy_type not in ["car", "boat", "house", "motorcycle"]:
         raise HTTPException(status_code=400, detail="Invalid policy type")
 
-    return {"message": f"Policy initiated for {policy_request.policy_type}", "details": policy_request.details}
+    return {"message": f"Policy initiated for {policy_request.policy_type}", "deductible": policy_request.deductible}
 
 # Update claim details
 @app.post("/policy/claim")
@@ -69,8 +65,8 @@ async def update_claim(req: ClaimUpdate):
     Update the status or details of a claim.
     """
     # For simplicity, this is a mock update response
-    return {"message": f"Claim {claim_update.claim_id} for policy {claim_update.policy_id} has been updated",
-            "update": claim_update.update}
+    return {"message": f"Claim {claim_update.claim_id} for policy {claim_update.claim_id} has been updated",
+            "update": claim_update.notes}
 
 # Update deductible amount
 @app.post("/policy/deductible")
@@ -80,43 +76,31 @@ async def update_deductible(deductible_update: DeductibleUpdate):
     """
     # For simplicity, this is a mock update response
     return {"message": f"Deductible for policy {deductible_update.policy_id} has been updated",
-            "new_deductible": deductible_update.new_deductible}
+            "new_deductible": deductible_update.deductible}
 
 # Post method for policy Q/A
 @app.post("/policy/qa")
-async def policy_qa():
+async def policy_qa(conversation: Conversation):
     """
     This method handles Q/A related to general issues in insurance.
     It forwards the conversation to the OpenAI client via a local proxy and returns the response.
     """
-    try:
-        # Get the latest user message from the conversation
-        user_message = conversation.messages[-1].content  # Assuming the last message is from the user
-
-        # Call the OpenAI API through the Python client
-        response = openai.Completion.create(
-            model="gpt-4o",  # Replace with the model you want to use
-            prompt=user_message,
-            max_tokens=150
-        )
-
-        # Extract the response text from OpenAI
-        completion = response.choices[0].text.strip()
-
-        # Build the assistant's response message
-        assistant_message = Message(role="assistant", content=completion)
-
-        # Append the assistant's response to the conversation and return it
-        updated_conversation = Conversation(
-            messages=conversation.messages + [assistant_message]
-        )
-
-        return updated_conversation
-
-    except openai.error.OpenAIError as e:
-        raise HTTPException(status_code=500, detail=f"LLM error: {str(e)}")
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
+    return {
+            "choices": [
+              {
+                "message": {
+                  "role": "assistant",
+                  "content": "I am a helpful insurance agent, and can only help with insurance things"
+                },
+                "finish_reason": "completed",
+                "index": 0
+              }
+            ],
+            "model": "insurance_agent",
+            "usage": {
+              "completion_tokens": 0
+            }
+          }
 
 # Run the app using:
 # uvicorn main:app --reload
diff --git a/demos/insurance_agent/requirements.txt b/demos/insurance_agent/requirements.txt
new file mode 100644
index 00000000..6703613f
--- /dev/null
+++ b/demos/insurance_agent/requirements.txt
@@ -0,0 +1,4 @@
+fastapi
+uvicorn
+pydantic
+openai