diff --git a/demos/currency_exchange/arch_config.yaml b/demos/currency_exchange/arch_config.yaml index 64e0f253..6f2a5b7e 100644 --- a/demos/currency_exchange/arch_config.yaml +++ b/demos/currency_exchange/arch_config.yaml @@ -7,9 +7,9 @@ listener: connect_timeout: 0.005s llm_providers: - - name: gpt-4o + - name: openai-gpt-4o access_key: $OPENAI_API_KEY - provider: openai + provider_interface: openai model: gpt-4o system_prompt: | diff --git a/demos/hr_agent/arch_config.yaml b/demos/hr_agent/arch_config.yaml index e7bf60f2..09264821 100644 --- a/demos/hr_agent/arch_config.yaml +++ b/demos/hr_agent/arch_config.yaml @@ -7,7 +7,7 @@ listener: # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way llm_providers: - name: OpenAI - provider: openai + provider_interface: openai access_key: $OPENAI_API_KEY model: gpt-4o-mini default: true diff --git a/demos/insurance_agent/arch_config.yaml b/demos/insurance_agent/arch_config.yaml index ce8a0088..9e5c6ed3 100644 --- a/demos/insurance_agent/arch_config.yaml +++ b/demos/insurance_agent/arch_config.yaml @@ -9,7 +9,7 @@ system_prompt: | llm_providers: - name: OpenAI - provider: openai + provider_interface: openai access_key: $OPENAI_API_KEY model: gpt-4o default: true diff --git a/demos/llm_routing/README.md b/demos/llm_routing/README.md index f5a49971..36214483 100644 --- a/demos/llm_routing/README.md +++ b/demos/llm_routing/README.md @@ -10,10 +10,18 @@ This demo shows how you can arch gateway to manage keys and route to appropricat 1. Navigate to http://localhost:18080/ # Observability -Arch gateway publishes stats endpoint at http://localhost:19901/stats. In this demo we are using prometheus to pull stats from arch and we are using grafana to visalize the stats in dashboard. To see grafana dashboard follow instructions below, +Arch gateway publishes stats endpoint at http://localhost:19901/stats. In this demo we are using prometheus to pull stats from arch and we are using grafana to visualize the stats in dashboard. To see grafana dashboard follow instructions below, 1. Navigate to http://localhost:3000/ to open grafana UI (use admin/grafana as credentials) 1. From grafana left nav click on dashboards and select "Intelligent Gateway Overview" to view arch gateway stats # Selecting different LLM -You can pick different LLM based on header `x-arch-llm-provider-hint` to override default LLM. +You can pick different LLM based on header `x-arch-llm-provider-hint` to override default LLM. Or you can use chatbot UI to override LLM. + +Here is a preview of chat UI, + +![LLM Routing Demo](docs/source/_static/img/llm_routing_demo.png) + +And you can also see tracing using jaeger UI that we start by heading over to http://localhost:16686/ + +![Jaeger Tracing](docs/source/_static/img/jaeger_tracing_llm_routing.png) diff --git a/demos/llm_routing/arch_config.yaml b/demos/llm_routing/arch_config.yaml index 620a1d10..f7ce78cd 100644 --- a/demos/llm_routing/arch_config.yaml +++ b/demos/llm_routing/arch_config.yaml @@ -9,23 +9,23 @@ listener: llm_providers: - name: gpt-4o-mini access_key: $OPENAI_API_KEY - provider: openai + provider_interface: openai model: gpt-4o-mini default: true - name: gpt-3.5-turbo-0125 access_key: $OPENAI_API_KEY - provider: openai + provider_interface: openai model: gpt-3.5-turbo-0125 - name: gpt-4o access_key: $OPENAI_API_KEY - provider: openai + provider_interface: openai model: gpt-4o - name: ministral-3b access_key: $MISTRAL_API_KEY - provider: mistral + provider_interface: mistral model: ministral-3b-latest tracing: diff --git a/demos/multi_turn_rag_agent/arch_config.yaml b/demos/multi_turn_rag_agent/arch_config.yaml index 0faffd6d..1399965f 100644 --- a/demos/multi_turn_rag_agent/arch_config.yaml +++ b/demos/multi_turn_rag_agent/arch_config.yaml @@ -14,7 +14,7 @@ endpoints: llm_providers: - name: gpt-4o-mini access_key: $OPENAI_API_KEY - provider: openai + provider_interface: openai model: gpt-4o-mini default: true diff --git a/demos/network_agent/arch_config.yaml b/demos/network_agent/arch_config.yaml index f46dd204..ad3bfae5 100644 --- a/demos/network_agent/arch_config.yaml +++ b/demos/network_agent/arch_config.yaml @@ -7,7 +7,7 @@ listener: # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way llm_providers: - name: OpenAI - provider: openai + provider_interface: openai access_key: $OPENAI_API_KEY model: gpt-3.5-turbo default: true diff --git a/demos/weather_forecast/arch_config.yaml b/demos/weather_forecast/arch_config.yaml index b2c11df5..94a6bdfb 100644 --- a/demos/weather_forecast/arch_config.yaml +++ b/demos/weather_forecast/arch_config.yaml @@ -18,18 +18,18 @@ overrides: llm_providers: - name: gpt-4o-mini access_key: $OPENAI_API_KEY - provider: openai + provider_interface: openai model: gpt-4o-mini default: true - name: gpt-3.5-turbo-0125 access_key: $OPENAI_API_KEY - provider: openai + provider_interface: openai model: gpt-3.5-turbo-0125 - name: gpt-4o access_key: $OPENAI_API_KEY - provider: openai + provider_interface: openai model: gpt-4o system_prompt: | diff --git a/docs/source/_static/img/jaeger_tracing_llm_routing.png b/docs/source/_static/img/jaeger_tracing_llm_routing.png new file mode 100644 index 00000000..e18016d1 Binary files /dev/null and b/docs/source/_static/img/jaeger_tracing_llm_routing.png differ diff --git a/docs/source/_static/img/llm_routing_demo.png b/docs/source/_static/img/llm_routing_demo.png new file mode 100644 index 00000000..50f25677 Binary files /dev/null and b/docs/source/_static/img/llm_routing_demo.png differ diff --git a/docs/source/concepts/includes/arch_config.yaml b/docs/source/concepts/includes/arch_config.yaml index 44da39ff..c78f35f7 100644 --- a/docs/source/concepts/includes/arch_config.yaml +++ b/docs/source/concepts/includes/arch_config.yaml @@ -9,7 +9,7 @@ listener: # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way llm_providers: - name: OpenAI - provider: openai + provider_interface: openai access_key: $OPENAI_API_KEY model: gpt-4o default: true diff --git a/docs/source/guides/includes/arch_config.yaml b/docs/source/guides/includes/arch_config.yaml index 73fdff00..415c74aa 100644 --- a/docs/source/guides/includes/arch_config.yaml +++ b/docs/source/guides/includes/arch_config.yaml @@ -9,7 +9,7 @@ listener: # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way llm_providers: - name: OpenAI - provider: openai + provider_interface: openai access_key: $OPENAI_API_KEY model: gpt-4o default: true diff --git a/tests/archgw/arch_config.yaml b/tests/archgw/arch_config.yaml index 3f450717..2c3d85d5 100644 --- a/tests/archgw/arch_config.yaml +++ b/tests/archgw/arch_config.yaml @@ -14,18 +14,18 @@ endpoints: llm_providers: - name: gpt-4o-mini access_key: $OPENAI_API_KEY - provider: openai + provider_interface: openai model: gpt-4o-mini default: true - name: gpt-3.5-turbo-0125 access_key: $OPENAI_API_KEY - provider: openai + provider_interface: openai model: gpt-3.5-turbo-0125 - name: gpt-4o access_key: $OPENAI_API_KEY - provider: openai + provider_interface: openai model: gpt-4o system_prompt: |