From c7f8c2cef93717188927ccbff56046ec0842b9df Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Thu, 16 Jan 2025 16:34:17 -0800 Subject: [PATCH] add demo for ollama --- crates/llm_gateway/src/stream_context.rs | 30 ++++++----- demos/currency_exchange_ollama/README.md | 3 ++ .../currency_exchange_ollama/arch_config.yaml | 52 +++++++++++++++++++ .../docker-compose.yaml | 21 ++++++++ demos/currency_exchange_ollama/run_demo.sh | 47 +++++++++++++++++ 5 files changed, 140 insertions(+), 13 deletions(-) create mode 100644 demos/currency_exchange_ollama/README.md create mode 100644 demos/currency_exchange_ollama/arch_config.yaml create mode 100644 demos/currency_exchange_ollama/docker-compose.yaml create mode 100644 demos/currency_exchange_ollama/run_demo.sh diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index 4df9779e..7169d8cd 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -337,16 +337,18 @@ impl HttpContext for StreamContext { // Record the latency to the latency histogram self.metrics.request_latency.record(duration_ms as u64); - // Compute the time per output token - let tpot = duration_ms as u64 / self.response_tokens as u64; + if self.response_tokens > 0 { + // Compute the time per output token + let tpot = duration_ms as u64 / self.response_tokens as u64; - debug!("Time per output token: {} milliseconds", tpot); - // Record the time per output token - self.metrics.time_per_output_token.record(tpot); + debug!("Time per output token: {} milliseconds", tpot); + // Record the time per output token + self.metrics.time_per_output_token.record(tpot); - debug!("Tokens per second: {}", 1000 / tpot); - // Record the tokens per second - self.metrics.tokens_per_second.record(1000 / tpot); + debug!("Tokens per second: {}", 1000 / tpot); + // Record the tokens per second + self.metrics.tokens_per_second.record(1000 / tpot); + } } Err(e) => { warn!("SystemTime error: {:?}", e); @@ -384,11 +386,13 @@ impl HttpContext for StreamContext { self.llm_provider().name.to_string(), ); - llm_span.add_event(Event::new( - "time_to_first_token".to_string(), - self.ttft_time.unwrap(), - )); - trace_data.add_span(llm_span); + if self.ttft_time.is_some() { + llm_span.add_event(Event::new( + "time_to_first_token".to_string(), + self.ttft_time.unwrap(), + )); + trace_data.add_span(llm_span); + } self.traces_queue.lock().unwrap().push_back(trace_data); } diff --git a/demos/currency_exchange_ollama/README.md b/demos/currency_exchange_ollama/README.md new file mode 100644 index 00000000..ef121a75 --- /dev/null +++ b/demos/currency_exchange_ollama/README.md @@ -0,0 +1,3 @@ +This demo shows how you can use ollama as upstream LLM. + +Before you can start the demo please make sure you have ollama up and running. You can use command `ollama run llama3.2` to start llama 3.2 (3b) model locally at port `11434`. diff --git a/demos/currency_exchange_ollama/arch_config.yaml b/demos/currency_exchange_ollama/arch_config.yaml new file mode 100644 index 00000000..d5a006df --- /dev/null +++ b/demos/currency_exchange_ollama/arch_config.yaml @@ -0,0 +1,52 @@ +version: v0.1 + +listener: + address: 0.0.0.0 + port: 10000 + message_format: huggingface + connect_timeout: 0.005s + +llm_providers: + - name: local-llama + provider: local-llama + model: llama3.2 + endpoint: host.docker.internal:11434 + +system_prompt: | + You are a helpful assistant. + +prompt_guards: + input_guards: + jailbreak: + on_exception: + message: Looks like you're curious about my abilities, but I can only provide assistance for currency exchange. + +prompt_targets: + - name: currency_exchange + description: Get currency exchange rate from USD to other currencies + parameters: + - name: currency_symbol + description: the currency that needs conversion + required: true + type: str + in_path: true + endpoint: + name: frankfurther_api + path: /v1/latest?base=USD&symbols={currency_symbol} + system_prompt: | + You are a helpful assistant. Show me the currency symbol you want to convert from USD. + + - name: get_supported_currencies + description: Get list of supported currencies for conversion + endpoint: + name: frankfurther_api + path: /v1/currencies + +endpoints: + frankfurther_api: + endpoint: api.frankfurter.dev:443 + protocol: https + +tracing: + random_sampling: 100 + trace_arch_internal: true diff --git a/demos/currency_exchange_ollama/docker-compose.yaml b/demos/currency_exchange_ollama/docker-compose.yaml new file mode 100644 index 00000000..32e52c40 --- /dev/null +++ b/demos/currency_exchange_ollama/docker-compose.yaml @@ -0,0 +1,21 @@ +services: + chatbot_ui: + build: + context: ../shared/chatbot_ui + ports: + - "18080:8080" + environment: + # this is only because we are running the sample app in the same docker container environemtn as archgw + - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1 + extra_hosts: + - "host.docker.internal:host-gateway" + volumes: + - ./arch_config.yaml:/app/arch_config.yaml + + jaeger: + build: + context: ../shared/jaeger + ports: + - "16686:16686" + - "4317:4317" + - "4318:4318" diff --git a/demos/currency_exchange_ollama/run_demo.sh b/demos/currency_exchange_ollama/run_demo.sh new file mode 100644 index 00000000..eb47dce6 --- /dev/null +++ b/demos/currency_exchange_ollama/run_demo.sh @@ -0,0 +1,47 @@ +#!/bin/bash +set -e + +# Function to start the demo +start_demo() { + # Step 1: Check if .env file exists + if [ -f ".env" ]; then + echo ".env file already exists. Skipping creation." + else + # Step 2: Create `.env` file and set OpenAI key + if [ -z "$OPENAI_API_KEY" ]; then + echo "Error: OPENAI_API_KEY environment variable is not set for the demo." + exit 1 + fi + + echo "Creating .env file..." + echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env + echo ".env file created with OPENAI_API_KEY." + fi + + # Step 3: Start Arch + echo "Starting Arch with arch_config.yaml..." + archgw up arch_config.yaml + + # Step 4: Start developer services + echo "Starting Network Agent using Docker Compose..." + docker compose up -d # Run in detached mode +} + +# Function to stop the demo +stop_demo() { + # Step 1: Stop Docker Compose services + echo "Stopping Network Agent using Docker Compose..." + docker compose down + + # Step 2: Stop Arch + echo "Stopping Arch..." + archgw down +} + +# Main script logic +if [ "$1" == "down" ]; then + stop_demo +else + # Default action is to bring the demo up + start_demo +fi