diff --git a/demos/use_cases/llm_routing/arch_config.yaml b/demos/use_cases/llm_routing/arch_config.yaml index 289d8bf2..11087d3a 100644 --- a/demos/use_cases/llm_routing/arch_config.yaml +++ b/demos/use_cases/llm_routing/arch_config.yaml @@ -30,5 +30,11 @@ llm_providers: model: deepseek-reasoner base_url: https://api.deepseek.com/ + - name: groq + access_key: $GROQ_API_KEY + provider_interface: openai + model: llama-3.1-8b-instant + base_url: https://api.groq.com + tracing: random_sampling: 100 diff --git a/model_server/src/commons/globals.py b/model_server/src/commons/globals.py index e88ecaaa..a39be7c3 100644 --- a/model_server/src/commons/globals.py +++ b/model_server/src/commons/globals.py @@ -16,6 +16,8 @@ logger = get_model_server_logger() # Define the client # ARCH_ENDPOINT = os.getenv("ARCH_ENDPOINT", "https://archfc.katanemo.dev/v1") +# use temporary endpoint until we deprecate archfc-v1.0 from archfc.katanemo.dev +# and officially release archfc-v1.1 on archfc.katanemo.dev ARCH_ENDPOINT = os.getenv("ARCH_ENDPOINT", "http://34.72.123.163:8000/v1") ARCH_API_KEY = "EMPTY" ARCH_CLIENT = OpenAI(base_url=ARCH_ENDPOINT, api_key=ARCH_API_KEY)