more updates

2026-07-02 15:51:02 +02:00 · 2025-07-10 15:34:12 -07:00 · 2025-07-10 15:34:12 -07:00 · 7f90124bd1
commit 7f90124bd1
parent e7eb77383f
29 changed files with 375 additions and 133 deletions
--- a/demos/use_cases/preference_based_routing/arch_config.yaml
+++ b/demos/use_cases/preference_based_routing/arch_config.yaml
@ -9,22 +9,21 @@ listeners:

 llm_providers:

-  - access_key: $OPENAI_API_KEY
-    model: openai/gpt-4o-mini
-
-  - access_key: $OPENAI_API_KEY
-    model: openai/gpt-4.1
+  - model: openai/gpt-4o-mini
+    access_key: $OPENAI_API_KEY
    default: true

-  - name: code_generation
+  - model: openai/gpt-4o
    access_key: $OPENAI_API_KEY
-    model: openai/gpt-4.1
-    usage: generating new code snippets, functions, or boilerplate based on user prompts or requirements
+    routing_preferences:
+      - name: code understanding
+        description: understand and explain existing code snippets, functions, or libraries

-  - name: code_understanding
+  - model: openai/gpt-4.1
    access_key: $OPENAI_API_KEY
-    model: openai/gpt-4o-mini
-    usage: understand and explain existing code snippets, functions, or libraries
+    routing_preferences:
+      - name: code generation
+        description: generating new code snippets, functions, or boilerplate based on user prompts or requirements

 tracing:
  random_sampling: 100
--- a/demos/use_cases/preference_based_routing/arch_config_local.yaml
+++ b/demos/use_cases/preference_based_routing/arch_config_local.yaml
@ -1,45 +0,0 @@
-version: v0.1.0
-
-routing:
-  model: Arch-Router
-  llm_provider: arch-router
-
-listeners:
-  egress_traffic:
-    address: 0.0.0.0
-    port: 12000
-    message_format: openai
-    timeout: 30s
-
-llm_providers:
-
-  - name: arch-router
-    provider_interface: arch
-    model: hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
-    endpoint: host.docker.internal:11434
-
-  - name: gpt-4o-mini
-    provider_interface: openai
-    access_key: $OPENAI_API_KEY
-    model: gpt-4o-mini
-
-  - name: gpt-4.1
-    provider_interface: openai
-    access_key: $OPENAI_API_KEY
-    model: gpt-4.1
-    default: true
-
-  - name: code_generation
-    access_key: $OPENAI_API_KEY
-    provider_interface: openai
-    model: gpt-4.1
-    usage: generating new code snippets, functions, or boilerplate based on user prompts or requirements
-
-  - name: code_understanding
-    provider_interface: openai
-    access_key: $OPENAI_API_KEY
-    model: gpt-4.1
-    usage: understand and explain existing code snippets, functions, or libraries
-
-tracing:
-  random_sampling: 100
--- a/demos/use_cases/preference_based_routing/arch_config_rendered.yaml
+++ b/demos/use_cases/preference_based_routing/arch_config_rendered.yaml
@ -0,0 +1,29 @@
+listeners:
+  egress_traffic:
+    address: 0.0.0.0
+    message_format: openai
+    port: 12000
+    timeout: 30s
+llm_providers:
+- access_key: $OPENAI_API_KEY
+  default: true
+  model: gpt-4o-mini
+  name: openai/gpt-4o-mini
+  provider_interface: openai
+- access_key: $OPENAI_API_KEY
+  model: gpt-4o
+  name: openai/gpt-4o
+  provider_interface: openai
+  routing_preferences:
+  - description: b
+    name: code understanding
+- access_key: $OPENAI_API_KEY
+  model: gpt-4.1
+  name: openai/gpt-4.1
+  provider_interface: openai
+  routing_preferences:
+  - description: a
+    name: code understanding
+tracing:
+  random_sampling: 100
+version: v0.1.0
--- a/demos/use_cases/preference_based_routing/hurl_tests/simple.hurl
+++ b/demos/use_cases/preference_based_routing/hurl_tests/simple.hurl
@ -2,18 +2,18 @@ POST http://localhost:12000/v1/chat/completions
 Content-Type: application/json

 {
+  "model": "openai/gpt-4.1",
  "messages": [
    {
      "role": "user",
      "content": "hi"
    }
-  ],
-  "model": "none"
+  ]
 }
 HTTP 200
 [Asserts]
 header "content-type" == "application/json"
-jsonpath "$.model" matches /^gpt-4.1/
+jsonpath "$.model" matches /^gpt-4o-mini/
 jsonpath "$.usage" != null
 jsonpath "$.choices[0].message.content" != null
 jsonpath "$.choices[0].message.role" == "assistant"