From c7f8c2cef93717188927ccbff56046ec0842b9df Mon Sep 17 00:00:00 2001
From: Adil Hafeez <adil.hafeez@gmail.com>
Date: Thu, 16 Jan 2025 16:34:17 -0800
Subject: [PATCH] add demo for ollama

---
 crates/llm_gateway/src/stream_context.rs      | 30 ++++++-----
 demos/currency_exchange_ollama/README.md      |  3 ++
 .../currency_exchange_ollama/arch_config.yaml | 52 +++++++++++++++++++
 .../docker-compose.yaml                       | 21 ++++++++
 demos/currency_exchange_ollama/run_demo.sh    | 47 +++++++++++++++++
 5 files changed, 140 insertions(+), 13 deletions(-)
 create mode 100644 demos/currency_exchange_ollama/README.md
 create mode 100644 demos/currency_exchange_ollama/arch_config.yaml
 create mode 100644 demos/currency_exchange_ollama/docker-compose.yaml
 create mode 100644 demos/currency_exchange_ollama/run_demo.sh

diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs
index 4df9779e..7169d8cd 100644
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@@ -337,16 +337,18 @@ impl HttpContext for StreamContext {
                     // Record the latency to the latency histogram
                     self.metrics.request_latency.record(duration_ms as u64);
 
-                    // Compute the time per output token
-                    let tpot = duration_ms as u64 / self.response_tokens as u64;
+                    if self.response_tokens > 0 {
+                        // Compute the time per output token
+                        let tpot = duration_ms as u64 / self.response_tokens as u64;
 
-                    debug!("Time per output token: {} milliseconds", tpot);
-                    // Record the time per output token
-                    self.metrics.time_per_output_token.record(tpot);
+                        debug!("Time per output token: {} milliseconds", tpot);
+                        // Record the time per output token
+                        self.metrics.time_per_output_token.record(tpot);
 
-                    debug!("Tokens per second: {}", 1000 / tpot);
-                    // Record the tokens per second
-                    self.metrics.tokens_per_second.record(1000 / tpot);
+                        debug!("Tokens per second: {}", 1000 / tpot);
+                        // Record the tokens per second
+                        self.metrics.tokens_per_second.record(1000 / tpot);
+                    }
                 }
                 Err(e) => {
                     warn!("SystemTime error: {:?}", e);
@@ -384,11 +386,13 @@ impl HttpContext for StreamContext {
                             self.llm_provider().name.to_string(),
                         );
 
-                        llm_span.add_event(Event::new(
-                            "time_to_first_token".to_string(),
-                            self.ttft_time.unwrap(),
-                        ));
-                        trace_data.add_span(llm_span);
+                        if self.ttft_time.is_some() {
+                            llm_span.add_event(Event::new(
+                                "time_to_first_token".to_string(),
+                                self.ttft_time.unwrap(),
+                            ));
+                            trace_data.add_span(llm_span);
+                        }
 
                         self.traces_queue.lock().unwrap().push_back(trace_data);
                     }
diff --git a/demos/currency_exchange_ollama/README.md b/demos/currency_exchange_ollama/README.md
new file mode 100644
index 00000000..ef121a75
--- /dev/null
+++ b/demos/currency_exchange_ollama/README.md
@@ -0,0 +1,3 @@
+This demo shows how you can use ollama as upstream LLM.
+
+Before you can start the demo please make sure you have ollama up and running. You can use command `ollama run llama3.2` to start llama 3.2 (3b) model locally at port `11434`.
diff --git a/demos/currency_exchange_ollama/arch_config.yaml b/demos/currency_exchange_ollama/arch_config.yaml
new file mode 100644
index 00000000..d5a006df
--- /dev/null
+++ b/demos/currency_exchange_ollama/arch_config.yaml
@@ -0,0 +1,52 @@
+version: v0.1
+
+listener:
+  address: 0.0.0.0
+  port: 10000
+  message_format: huggingface
+  connect_timeout: 0.005s
+
+llm_providers:
+  - name: local-llama
+    provider: local-llama
+    model: llama3.2
+    endpoint: host.docker.internal:11434
+
+system_prompt: |
+  You are a helpful assistant.
+
+prompt_guards:
+  input_guards:
+    jailbreak:
+      on_exception:
+        message: Looks like you're curious about my abilities, but I can only provide assistance for currency exchange.
+
+prompt_targets:
+  - name: currency_exchange
+    description: Get currency exchange rate from USD to other currencies
+    parameters:
+      - name: currency_symbol
+        description: the currency that needs conversion
+        required: true
+        type: str
+        in_path: true
+    endpoint:
+      name: frankfurther_api
+      path: /v1/latest?base=USD&symbols={currency_symbol}
+    system_prompt: |
+      You are a helpful assistant. Show me the currency symbol you want to convert from USD.
+
+  - name: get_supported_currencies
+    description: Get list of supported currencies for conversion
+    endpoint:
+      name: frankfurther_api
+      path: /v1/currencies
+
+endpoints:
+  frankfurther_api:
+    endpoint: api.frankfurter.dev:443
+    protocol: https
+
+tracing:
+  random_sampling: 100
+  trace_arch_internal: true
diff --git a/demos/currency_exchange_ollama/docker-compose.yaml b/demos/currency_exchange_ollama/docker-compose.yaml
new file mode 100644
index 00000000..32e52c40
--- /dev/null
+++ b/demos/currency_exchange_ollama/docker-compose.yaml
@@ -0,0 +1,21 @@
+services:
+  chatbot_ui:
+    build:
+      context: ../shared/chatbot_ui
+    ports:
+      - "18080:8080"
+    environment:
+      # this is only because we are running the sample app in the same docker container environemtn as archgw
+      - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    volumes:
+      - ./arch_config.yaml:/app/arch_config.yaml
+
+  jaeger:
+    build:
+      context: ../shared/jaeger
+    ports:
+      - "16686:16686"
+      - "4317:4317"
+      - "4318:4318"
diff --git a/demos/currency_exchange_ollama/run_demo.sh b/demos/currency_exchange_ollama/run_demo.sh
new file mode 100644
index 00000000..eb47dce6
--- /dev/null
+++ b/demos/currency_exchange_ollama/run_demo.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+set -e
+
+# Function to start the demo
+start_demo() {
+  # Step 1: Check if .env file exists
+  if [ -f ".env" ]; then
+    echo ".env file already exists. Skipping creation."
+  else
+    # Step 2: Create `.env` file and set OpenAI key
+    if [ -z "$OPENAI_API_KEY" ]; then
+      echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
+      exit 1
+    fi
+
+    echo "Creating .env file..."
+    echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
+    echo ".env file created with OPENAI_API_KEY."
+  fi
+
+  # Step 3: Start Arch
+  echo "Starting Arch with arch_config.yaml..."
+  archgw up arch_config.yaml
+
+  # Step 4: Start developer services
+  echo "Starting Network Agent using Docker Compose..."
+  docker compose up -d  # Run in detached mode
+}
+
+# Function to stop the demo
+stop_demo() {
+  # Step 1: Stop Docker Compose services
+  echo "Stopping Network Agent using Docker Compose..."
+  docker compose down
+
+  # Step 2: Stop Arch
+  echo "Stopping Arch..."
+  archgw down
+}
+
+# Main script logic
+if [ "$1" == "down" ]; then
+  stop_demo
+else
+  # Default action is to bring the demo up
+  start_demo
+fi