mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
add demo for ollama
This commit is contained in:
parent
2928b7630f
commit
c7f8c2cef9
5 changed files with 140 additions and 13 deletions
|
|
@ -337,16 +337,18 @@ impl HttpContext for StreamContext {
|
|||
// Record the latency to the latency histogram
|
||||
self.metrics.request_latency.record(duration_ms as u64);
|
||||
|
||||
// Compute the time per output token
|
||||
let tpot = duration_ms as u64 / self.response_tokens as u64;
|
||||
if self.response_tokens > 0 {
|
||||
// Compute the time per output token
|
||||
let tpot = duration_ms as u64 / self.response_tokens as u64;
|
||||
|
||||
debug!("Time per output token: {} milliseconds", tpot);
|
||||
// Record the time per output token
|
||||
self.metrics.time_per_output_token.record(tpot);
|
||||
debug!("Time per output token: {} milliseconds", tpot);
|
||||
// Record the time per output token
|
||||
self.metrics.time_per_output_token.record(tpot);
|
||||
|
||||
debug!("Tokens per second: {}", 1000 / tpot);
|
||||
// Record the tokens per second
|
||||
self.metrics.tokens_per_second.record(1000 / tpot);
|
||||
debug!("Tokens per second: {}", 1000 / tpot);
|
||||
// Record the tokens per second
|
||||
self.metrics.tokens_per_second.record(1000 / tpot);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("SystemTime error: {:?}", e);
|
||||
|
|
@ -384,11 +386,13 @@ impl HttpContext for StreamContext {
|
|||
self.llm_provider().name.to_string(),
|
||||
);
|
||||
|
||||
llm_span.add_event(Event::new(
|
||||
"time_to_first_token".to_string(),
|
||||
self.ttft_time.unwrap(),
|
||||
));
|
||||
trace_data.add_span(llm_span);
|
||||
if self.ttft_time.is_some() {
|
||||
llm_span.add_event(Event::new(
|
||||
"time_to_first_token".to_string(),
|
||||
self.ttft_time.unwrap(),
|
||||
));
|
||||
trace_data.add_span(llm_span);
|
||||
}
|
||||
|
||||
self.traces_queue.lock().unwrap().push_back(trace_data);
|
||||
}
|
||||
|
|
|
|||
3
demos/currency_exchange_ollama/README.md
Normal file
3
demos/currency_exchange_ollama/README.md
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
This demo shows how you can use ollama as upstream LLM.
|
||||
|
||||
Before you can start the demo please make sure you have ollama up and running. You can use command `ollama run llama3.2` to start llama 3.2 (3b) model locally at port `11434`.
|
||||
52
demos/currency_exchange_ollama/arch_config.yaml
Normal file
52
demos/currency_exchange_ollama/arch_config.yaml
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
version: v0.1
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: huggingface
|
||||
connect_timeout: 0.005s
|
||||
|
||||
llm_providers:
|
||||
- name: local-llama
|
||||
provider: local-llama
|
||||
model: llama3.2
|
||||
endpoint: host.docker.internal:11434
|
||||
|
||||
system_prompt: |
|
||||
You are a helpful assistant.
|
||||
|
||||
prompt_guards:
|
||||
input_guards:
|
||||
jailbreak:
|
||||
on_exception:
|
||||
message: Looks like you're curious about my abilities, but I can only provide assistance for currency exchange.
|
||||
|
||||
prompt_targets:
|
||||
- name: currency_exchange
|
||||
description: Get currency exchange rate from USD to other currencies
|
||||
parameters:
|
||||
- name: currency_symbol
|
||||
description: the currency that needs conversion
|
||||
required: true
|
||||
type: str
|
||||
in_path: true
|
||||
endpoint:
|
||||
name: frankfurther_api
|
||||
path: /v1/latest?base=USD&symbols={currency_symbol}
|
||||
system_prompt: |
|
||||
You are a helpful assistant. Show me the currency symbol you want to convert from USD.
|
||||
|
||||
- name: get_supported_currencies
|
||||
description: Get list of supported currencies for conversion
|
||||
endpoint:
|
||||
name: frankfurther_api
|
||||
path: /v1/currencies
|
||||
|
||||
endpoints:
|
||||
frankfurther_api:
|
||||
endpoint: api.frankfurter.dev:443
|
||||
protocol: https
|
||||
|
||||
tracing:
|
||||
random_sampling: 100
|
||||
trace_arch_internal: true
|
||||
21
demos/currency_exchange_ollama/docker-compose.yaml
Normal file
21
demos/currency_exchange_ollama/docker-compose.yaml
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
services:
|
||||
chatbot_ui:
|
||||
build:
|
||||
context: ../shared/chatbot_ui
|
||||
ports:
|
||||
- "18080:8080"
|
||||
environment:
|
||||
# this is only because we are running the sample app in the same docker container environemtn as archgw
|
||||
- CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
volumes:
|
||||
- ./arch_config.yaml:/app/arch_config.yaml
|
||||
|
||||
jaeger:
|
||||
build:
|
||||
context: ../shared/jaeger
|
||||
ports:
|
||||
- "16686:16686"
|
||||
- "4317:4317"
|
||||
- "4318:4318"
|
||||
47
demos/currency_exchange_ollama/run_demo.sh
Normal file
47
demos/currency_exchange_ollama/run_demo.sh
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Function to start the demo
|
||||
start_demo() {
|
||||
# Step 1: Check if .env file exists
|
||||
if [ -f ".env" ]; then
|
||||
echo ".env file already exists. Skipping creation."
|
||||
else
|
||||
# Step 2: Create `.env` file and set OpenAI key
|
||||
if [ -z "$OPENAI_API_KEY" ]; then
|
||||
echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Creating .env file..."
|
||||
echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
|
||||
echo ".env file created with OPENAI_API_KEY."
|
||||
fi
|
||||
|
||||
# Step 3: Start Arch
|
||||
echo "Starting Arch with arch_config.yaml..."
|
||||
archgw up arch_config.yaml
|
||||
|
||||
# Step 4: Start developer services
|
||||
echo "Starting Network Agent using Docker Compose..."
|
||||
docker compose up -d # Run in detached mode
|
||||
}
|
||||
|
||||
# Function to stop the demo
|
||||
stop_demo() {
|
||||
# Step 1: Stop Docker Compose services
|
||||
echo "Stopping Network Agent using Docker Compose..."
|
||||
docker compose down
|
||||
|
||||
# Step 2: Stop Arch
|
||||
echo "Stopping Arch..."
|
||||
archgw down
|
||||
}
|
||||
|
||||
# Main script logic
|
||||
if [ "$1" == "down" ]; then
|
||||
stop_demo
|
||||
else
|
||||
# Default action is to bring the demo up
|
||||
start_demo
|
||||
fi
|
||||
Loading…
Add table
Add a link
Reference in a new issue