Add support for streaming and fixes few issues (see description) (#202)

2026-05-11 00:32:42 +02:00 · 2024-10-28 20:05:06 -04:00 · 2024-10-28 20:05:06 -04:00 · 662a840ac5
commit 662a840ac5
parent 29ff8da60f
45 changed files with 2266 additions and 477 deletions
--- a/demos/function_calling/api_server/app/main.py
+++ b/demos/function_calling/api_server/app/main.py
@ -66,18 +66,18 @@ async def insurance_claim_details(req: InsuranceClaimDetailsRequest, res: Respon


 class DefaultTargetRequest(BaseModel):
-    arch_messages: list
+    messages: list


@app.post("/default_target")
 async def default_target(req: DefaultTargetRequest, res: Response):
-    logger.info(f"Received arch_messages: {req.arch_messages}")
+    logger.info(f"Received arch_messages: {req.messages}")
    resp = {
        "choices": [
            {
                "message": {
                    "role": "assistant",
-                    "content": "hello world from api server",
+                    "content": "I can help you with weather forecast or insurance claim details",
                },
                "finish_reason": "completed",
                "index": 0,
--- a/demos/function_calling/arch_config.yaml
+++ b/demos/function_calling/arch_config.yaml
@ -16,12 +16,27 @@ overrides:
  prompt_target_intent_matching_threshold: 0.6

 llm_providers:
-  - name: gpt
-    access_key: OPENAI_API_KEY
+  - name: gpt-4o-mini
+    access_key: $OPENAI_API_KEY
    provider: openai
-    model: gpt-3.5-turbo
+    model: gpt-4o-mini
    default: true

+  - name: gpt-3.5-turbo-0125
+    access_key: $OPENAI_API_KEY
+    provider: openai
+    model: gpt-3.5-turbo-0125
+
+  - name: gpt-4o
+    access_key: $OPENAI_API_KEY
+    provider: openai
+    model: gpt-4o
+
+  - name: ministral-3b
+    access_key: $MISTRAL_API_KEY
+    provider: mistral
+    model: ministral-3b-latest
+
 system_prompt: |
  You are a helpful assistant.

@ -67,10 +82,10 @@ prompt_targets:
      name: api_server
      path: /default_target
    system_prompt: |
-      You are a helpful assistant. Use the information that is provided to you.
+      You are a helpful assistant! Summarize the user's request and provide a helpful response.
    # if it is set to false arch will send response that it received from this prompt target to the user
    # if true arch will forward the response to the default LLM
-    auto_llm_dispatch_on_response: true
+    auto_llm_dispatch_on_response: false

 tracing:
  random_sampling: 100
--- a/demos/function_calling/docker-compose.yaml
+++ b/demos/function_calling/docker-compose.yaml
@ -13,11 +13,11 @@ services:
  chatbot_ui:
    build:
      context: ../../chatbot_ui
-      dockerfile: Dockerfile
    ports:
      - "18080:8080"
    environment:
-      - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1 #this is only because we are running the sample app in the same docker container environemtn as archgw
+      # this is only because we are running the sample app in the same docker container environemtn as archgw
+      - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1
    extra_hosts:
      - "host.docker.internal:host-gateway"
    volumes:
@ -38,6 +38,8 @@ services:
    - "${PORT_UI:-55679}:55679"
    - "${PORT_GRPC:-4317}:4317"
    - "${PORT_HTTP:-4318}:4318"
+    profiles:
+      - monitoring

  prometheus:
    image: prom/prometheus