From 8b9f48ce9b3781b1b3231c614c42602ae03aa1cc Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Tue, 15 Apr 2025 13:26:43 -0700 Subject: [PATCH] add comments for v1.1 archfc model endpoint --- demos/use_cases/llm_routing/arch_config.yaml | 6 ++++++ model_server/src/commons/globals.py | 2 ++ 2 files changed, 8 insertions(+) diff --git a/demos/use_cases/llm_routing/arch_config.yaml b/demos/use_cases/llm_routing/arch_config.yaml index 289d8bf2..11087d3a 100644 --- a/demos/use_cases/llm_routing/arch_config.yaml +++ b/demos/use_cases/llm_routing/arch_config.yaml @@ -30,5 +30,11 @@ llm_providers: model: deepseek-reasoner base_url: https://api.deepseek.com/ + - name: groq + access_key: $GROQ_API_KEY + provider_interface: openai + model: llama-3.1-8b-instant + base_url: https://api.groq.com + tracing: random_sampling: 100 diff --git a/model_server/src/commons/globals.py b/model_server/src/commons/globals.py index e88ecaaa..a39be7c3 100644 --- a/model_server/src/commons/globals.py +++ b/model_server/src/commons/globals.py @@ -16,6 +16,8 @@ logger = get_model_server_logger() # Define the client # ARCH_ENDPOINT = os.getenv("ARCH_ENDPOINT", "https://archfc.katanemo.dev/v1") +# use temporary endpoint until we deprecate archfc-v1.0 from archfc.katanemo.dev +# and officially release archfc-v1.1 on archfc.katanemo.dev ARCH_ENDPOINT = os.getenv("ARCH_ENDPOINT", "http://34.72.123.163:8000/v1") ARCH_API_KEY = "EMPTY" ARCH_CLIENT = OpenAI(base_url=ARCH_ENDPOINT, api_key=ARCH_API_KEY)