Merge branch 'main' into adil/release_0.2.1

2026-06-23 15:38:07 +02:00 · 2025-02-07 19:14:31 -08:00 · 2025-02-07 19:14:31 -08:00 · dff8e1086d
commit dff8e1086d
parent 7b8e702ea7 8de6eacfbd
11 changed files with 265 additions and 8 deletions
--- a/arch/arch_config_schema.yaml
+++ b/arch/arch_config_schema.yaml
@ -79,6 +79,8 @@ properties:
    properties:
      prompt_target_intent_matching_threshold:
        type: number
      optimize_context_window:
        type: boolean
  system_prompt:
    type: string
  prompt_targets:
--- a/crates/common/src/configuration.rs
+++ b/crates/common/src/configuration.rs
@ -25,6 +25,7 @@ pub struct Configuration {
 #[derive(Debug, Clone, Serialize, Deserialize, Default)]
 pub struct Overrides {
    pub prompt_target_intent_matching_threshold: Option<f64>,
    pub optimize_context_window: Option<bool>,
 }
 #[derive(Debug, Clone, Serialize, Deserialize, Default)]
--- a/crates/prompt_gateway/src/http_context.rs
+++ b/crates/prompt_gateway/src/http_context.rs
@ -137,9 +137,20 @@ impl HttpContext for StreamContext {
            .map(|(_, pt)| pt.into())
            .collect();
        let mut metadata = deserialized_body.metadata.clone();
        if let Some(overrides) = self.overrides.as_ref() {
            if overrides.optimize_context_window.unwrap_or_default() {
                if metadata.is_none() {
                    metadata = Some(HashMap::new());
                }
                metadata.as_mut().unwrap().insert("optimize_context_window".to_string(), "true".to_string());
            }
        }
        let arch_fc_chat_completion_request = ChatCompletionsRequest {
            messages: deserialized_body.messages.clone(),
-            metadata: deserialized_body.metadata.clone(),
+            metadata,
            stream: deserialized_body.stream,
            model: "--".to_string(),
            stream_options: deserialized_body.stream_options.clone(),
--- a/crates/prompt_gateway/src/stream_context.rs
+++ b/crates/prompt_gateway/src/stream_context.rs
@ -46,7 +46,7 @@ pub struct StreamCallContext {
 pub struct StreamContext {
    system_prompt: Rc<Option<String>>,
    pub prompt_targets: Rc<HashMap<String, PromptTarget>>,
-    _overrides: Rc<Option<Overrides>>,
+    pub overrides: Rc<Option<Overrides>>,
    pub metrics: Rc<Metrics>,
    pub callouts: RefCell<HashMap<u32, StreamCallContext>>,
    pub context_id: u32,
@ -89,7 +89,7 @@ impl StreamContext {
            streaming_response: false,
            user_prompt: None,
            is_chat_completions_request: false,
-            _overrides: overrides,
+            overrides: overrides,
            request_id: None,
            traceparent: None,
            _tracing: tracing,
--- a/demos/use_cases/spotify_bearer_auth/README.md
+++ b/demos/use_cases/spotify_bearer_auth/README.md
@ -0,0 +1,11 @@
 This demo shows how you can use spotify's public APIs using http auth header token.
 This demo uses following apis from spotify,
 - /v1/browse/new-releases
 - /v1/artists/{artist_id}/top-tracks
 For more details please see arch_config.yaml.
 To get auth token please follow this
 TODO
--- a/demos/use_cases/spotify_bearer_auth/arch_config.yaml
+++ b/demos/use_cases/spotify_bearer_auth/arch_config.yaml
@ -0,0 +1,155 @@
 version: v0.1
 listener:
  address: 127.0.0.1
  port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
  message_format: huggingface
 overrides:
  optimize_context_window: true
 endpoints:
  spotify:
    endpoint: api.spotify.com
    protocol: https
 system_prompt: |
  I have the following JSON data representing a list of albums from Spotify:
  {
  "items": [
    {
      "album_type": "album",
      "artists": [
        {
          "external_urls": {
            "spotify": "https://open.spotify.com/artist/06HL4z0CvFAxyc27GXpf02"
          },
          "href": "https://api.spotify.com/v1/artists/06HL4z0CvFAxyc27GXpf02",
          "id": "06HL4z0CvFAxyc27GXpf02",
          "name": "Taylor Swift",
          "type": "artist",
          "uri": "spotify:artist:06HL4z0CvFAxyc27GXpf02"
        }
      ],
      "available_markets": [ /* ... markets omitted for brevity ... */ ],
      "external_urls": {
        "spotify": "https://open.spotify.com/album/1Mo4aZ8pdj6L1jx8zSwJnt"
      },
      "href": "https://api.spotify.com/v1/albums/1Mo4aZ8pdj6L1jx8zSwJnt",
      "id": "1Mo4aZ8pdj6L1jx8zSwJnt",
      "images": [
        {
          "height": 300,
          "url": "https://i.scdn.co/image/ab67616d00001e025076e4160d018e378f488c33",
          "width": 300
        },
        {
          "height": 64,
          "url": "https://i.scdn.co/image/ab67616d000048515076e4160d018e378f488c33",
          "width": 64
        },
        {
          "height": 640,
          "url": "https://i.scdn.co/image/ab67616d0000b2735076e4160d018e378f488c33",
          "width": 640
        }
      ],
      "name": "THE TORTURED POETS DEPARTMENT",
      "release_date": "2024-04-18",
      "release_date_precision": "day",
      "total_tracks": 16,
      "type": "album",
      "uri": "spotify:album:1Mo4aZ8pdj6L1jx8zSwJnt"
    }
  ]
  }
  Please convert this JSON into Markdown with the following layout for each album:
  - Display the album image (using Markdown image syntax) first.
  - On the next line immediately after the image, display the album title, artist name (use the first artist listed), and the release date, all separated by a hyphen or another clear delimiter.
  - On the next line, provide the Spotify link (using Markdown link syntax).
  For example, the output should look similar to this (using the data above):
  ![Album Image](https://i.scdn.co/image/ab67616d00001e025076e4160d018e378f488c33)
  **THE TORTURED POETS DEPARTMENT**
  Taylor Swift - 2024-04-18
  [Listen on Spotify](https://open.spotify.com/album/1Mo4aZ8pdj6L1jx8zSwJnt)
  Arist Id: 06HL4z0CvFAxyc27GXpf02
  <hr>
  Make sure your output is valid Markdown. And don't say "formatted in Markdown". Thanks!
 llm_providers:
  - name: OpenAI
    provider_interface: openai
    access_key: $OPENAI_API_KEY
    model: gpt-4o
    default: true
 prompt_targets:
  - name: get_new_releases
    description: Get a list of new album releases featured in Spotify (shown, for example, on a Spotify player’s “Browse” tab).
    parameters:
      - name: country
        description: the country where the album is released
        required: true
        type: str
        in_path: true
      - name: limit
        type: integer
        description: The maximum number of results to return
        default: "5"
    endpoint:
      name: spotify
      path: /v1/browse/new-releases
      http_headers:
        Authorization: "Bearer $SPOTIFY_CLIENT_KEY"
  - name: get_artist_top_tracks
    description: Get information about an artist's top tracks
    parameters:
      - name: artist_id
        description: The ID of the artist.
        required: true
        type: str
        in_path: true
    endpoint:
      name: spotify
      path: /v1/artists/{artist_id}/top-tracks
      http_headers:
        Authorization: "Bearer $SPOTIFY_CLIENT_KEY"
  # - name: search_for_item
  #   description: Get information about albums, artists, playlists, tracks, shows, episodes or audiobooks. You can search for an item by its name, creator, or topic.
  #   parameters:
  #     - name: q
  #       description: Your search query, which can include keywords related to the item name, its creator, or its topic.
  #       required: true
  #       type: str
  #     - name: type
  #       type: str
  #       description: The type of the item to search for (e.g., album, artist, playlist, track, show, episode, audiobook).
  #       enum:
  #         - album
  #         - artist
  #         - playlist
  #         - track
  #         - show
  #         - episode
  #         - audiobook
  #       required: true
  #     - name: market
  #       type: str
  #       description: A country code
  #       default: US
  #     - name: limit
  #       type: integer
  #       description: The maximum number of results to return
  #       default: "5"
  #   endpoint:
  #     name: spotify
  #     path: /v1/search
  #     http_headers:
  #       Authorization: "Bearer $SPOTIFY_CLIENT_KEY"
--- a/demos/use_cases/spotify_bearer_auth/docker-compose.yaml
+++ b/demos/use_cases/spotify_bearer_auth/docker-compose.yaml
@ -0,0 +1,21 @@
 services:
  chatbot_ui:
    build:
      context: ../shared/chatbot_ui
    ports:
      - "18080:8080"
    environment:
      # this is only because we are running the sample app in the same docker container environemtn as archgw
      - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1
    extra_hosts:
      - "host.docker.internal:host-gateway"
    volumes:
      - ./arch_config.yaml:/app/arch_config.yaml
  jaeger:
    build:
      context: ../shared/jaeger
    ports:
      - "16686:16686"
      - "4317:4317"
      - "4318:4318"
--- a/demos/use_cases/spotify_bearer_auth/run_demo.sh
+++ b/demos/use_cases/spotify_bearer_auth/run_demo.sh
@ -0,0 +1,47 @@
 #!/bin/bash
 set -e
 # Function to start the demo
 start_demo() {
  # Step 1: Check if .env file exists
  if [ -f ".env" ]; then
    echo ".env file already exists. Skipping creation."
  else
    # Step 2: Create `.env` file and set OpenAI key
    if [ -z "$OPENAI_API_KEY" ]; then
      echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
      exit 1
    fi
    echo "Creating .env file..."
    echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
    echo ".env file created with OPENAI_API_KEY."
  fi
  # Step 3: Start Arch
  echo "Starting Arch with arch_config.yaml..."
  archgw up arch_config.yaml
  # Step 4: Start developer services
  echo "Starting Network Agent using Docker Compose..."
  docker compose up -d  # Run in detached mode
 }
 # Function to stop the demo
 stop_demo() {
  # Step 1: Stop Docker Compose services
  echo "Stopping Network Agent using Docker Compose..."
  docker compose down
  # Step 2: Stop Arch
  echo "Stopping Arch..."
  archgw down
 }
 # Main script logic
 if [ "$1" == "down" ]; then
  stop_demo
 else
  # Default action is to bring the demo up
  start_demo
 fi
--- a/model_server/src/core/function_calling.py
+++ b/model_server/src/core/function_calling.py
@ -134,7 +134,7 @@ class ArchIntentHandler(ArchBaseHandler):
                req.messages, req.tools, self.extra_instruction
            )
-            logger.info(f"[request]: {json.dumps(messages)}")
+            logger.info(f"[request to arch-fc (intent)]: {json.dumps(messages)}")
            model_response = self.client.chat.completions.create(
                messages=messages,
@ -519,9 +519,11 @@ class ArchFunctionHandler(ArchBaseHandler):
        """
        logger.info("[Arch-Function] - ChatCompletion")
-        messages = self._process_messages(req.messages, req.tools)
+        messages = self._process_messages(
            req.messages, req.tools, metadata=req.metadata
        )
-        logger.info(f"[request]: {json.dumps(messages)}")
+        logger.info(f"[request to arch-fc]: {json.dumps(messages)}")
        # always enable `stream=True` to collect model responses
        response = self.client.chat.completions.create(
--- a/model_server/src/core/guardrails.py
+++ b/model_server/src/core/guardrails.py
@ -105,7 +105,7 @@ class ArchGuardHanlder:
            raise NotImplementedError(f"{req.task} is not supported!")
        logger.info("[Arch-Guard] - Prediction")
-        logger.info(f"[request]: {req.input}")
+        logger.info(f"[request arch-guard]: {req.input}")
        if len(req.input.split()) < max_num_words:
            result = self._predict_text(req.task, req.input)
--- a/model_server/src/core/utils/model_utils.py
+++ b/model_server/src/core/utils/model_utils.py
@ -16,6 +16,7 @@ class Message(BaseModel):
 class ChatMessage(BaseModel):
    messages: List[Message] = []
    tools: List[Dict[str, Any]] = []
    metadata: Optional[Dict[str, str]] = {}
 class Choice(BaseModel):
@ -123,6 +124,7 @@ class ArchBaseHandler:
        tools: List[Dict[str, Any]] = None,
        extra_instruction: str = None,
        max_tokens=4096,
        metadata: Dict[str, str] = {},
    ):
        """
        Processes a list of messages and formats them appropriately.
@ -157,7 +159,12 @@ class ArchBaseHandler:
                content = f"<tool_call>\n{json.dumps(tool_calls[0]['function'])}\n</tool_call>"
            elif role == "tool":
                role = "user"
-                content = f"<tool_response>\n{json.dumps(content)}\n</tool_response>"
+                if metadata.get("optimize_context_window", "false").lower() == "true":
                    content = f"<tool_response>\n\n</tool_response>"
                else:
                    content = (
                        f"<tool_response>\n{json.dumps(content)}\n</tool_response>"
                    )
            processed_messages.append({"role": role, "content": content})