diff --git a/arch/arch_config_schema.yaml b/arch/arch_config_schema.yaml index f2b2c8a5..1b32b730 100644 --- a/arch/arch_config_schema.yaml +++ b/arch/arch_config_schema.yaml @@ -79,6 +79,8 @@ properties: properties: prompt_target_intent_matching_threshold: type: number + optimize_context_window: + type: boolean system_prompt: type: string prompt_targets: diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index f1250499..069695ba 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -25,6 +25,7 @@ pub struct Configuration { #[derive(Debug, Clone, Serialize, Deserialize, Default)] pub struct Overrides { pub prompt_target_intent_matching_threshold: Option, + pub optimize_context_window: Option, } #[derive(Debug, Clone, Serialize, Deserialize, Default)] diff --git a/crates/prompt_gateway/src/http_context.rs b/crates/prompt_gateway/src/http_context.rs index e7d920f1..1ff7f91d 100644 --- a/crates/prompt_gateway/src/http_context.rs +++ b/crates/prompt_gateway/src/http_context.rs @@ -137,9 +137,20 @@ impl HttpContext for StreamContext { .map(|(_, pt)| pt.into()) .collect(); + let mut metadata = deserialized_body.metadata.clone(); + + if let Some(overrides) = self.overrides.as_ref() { + if overrides.optimize_context_window.unwrap_or_default() { + if metadata.is_none() { + metadata = Some(HashMap::new()); + } + metadata.as_mut().unwrap().insert("optimize_context_window".to_string(), "true".to_string()); + } + } + let arch_fc_chat_completion_request = ChatCompletionsRequest { messages: deserialized_body.messages.clone(), - metadata: deserialized_body.metadata.clone(), + metadata, stream: deserialized_body.stream, model: "--".to_string(), stream_options: deserialized_body.stream_options.clone(), diff --git a/crates/prompt_gateway/src/stream_context.rs b/crates/prompt_gateway/src/stream_context.rs index 2704e8d8..e6db7f59 100644 --- a/crates/prompt_gateway/src/stream_context.rs +++ b/crates/prompt_gateway/src/stream_context.rs @@ -46,7 +46,7 @@ pub struct StreamCallContext { pub struct StreamContext { system_prompt: Rc>, pub prompt_targets: Rc>, - _overrides: Rc>, + pub overrides: Rc>, pub metrics: Rc, pub callouts: RefCell>, pub context_id: u32, @@ -89,7 +89,7 @@ impl StreamContext { streaming_response: false, user_prompt: None, is_chat_completions_request: false, - _overrides: overrides, + overrides: overrides, request_id: None, traceparent: None, _tracing: tracing, diff --git a/demos/use_cases/spotify_bearer_auth/README.md b/demos/use_cases/spotify_bearer_auth/README.md new file mode 100644 index 00000000..4f04c16f --- /dev/null +++ b/demos/use_cases/spotify_bearer_auth/README.md @@ -0,0 +1,11 @@ +This demo shows how you can use spotify's public APIs using http auth header token. + +This demo uses following apis from spotify, + +- /v1/browse/new-releases +- /v1/artists/{artist_id}/top-tracks + +For more details please see arch_config.yaml. + +To get auth token please follow this +TODO diff --git a/demos/use_cases/spotify_bearer_auth/arch_config.yaml b/demos/use_cases/spotify_bearer_auth/arch_config.yaml new file mode 100644 index 00000000..dbed4c8c --- /dev/null +++ b/demos/use_cases/spotify_bearer_auth/arch_config.yaml @@ -0,0 +1,155 @@ +version: v0.1 +listener: + address: 127.0.0.1 + port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates + message_format: huggingface + +overrides: + optimize_context_window: true + +endpoints: + spotify: + endpoint: api.spotify.com + protocol: https + +system_prompt: | + I have the following JSON data representing a list of albums from Spotify: + + { + "items": [ + { + "album_type": "album", + "artists": [ + { + "external_urls": { + "spotify": "https://open.spotify.com/artist/06HL4z0CvFAxyc27GXpf02" + }, + "href": "https://api.spotify.com/v1/artists/06HL4z0CvFAxyc27GXpf02", + "id": "06HL4z0CvFAxyc27GXpf02", + "name": "Taylor Swift", + "type": "artist", + "uri": "spotify:artist:06HL4z0CvFAxyc27GXpf02" + } + ], + "available_markets": [ /* ... markets omitted for brevity ... */ ], + "external_urls": { + "spotify": "https://open.spotify.com/album/1Mo4aZ8pdj6L1jx8zSwJnt" + }, + "href": "https://api.spotify.com/v1/albums/1Mo4aZ8pdj6L1jx8zSwJnt", + "id": "1Mo4aZ8pdj6L1jx8zSwJnt", + "images": [ + { + "height": 300, + "url": "https://i.scdn.co/image/ab67616d00001e025076e4160d018e378f488c33", + "width": 300 + }, + { + "height": 64, + "url": "https://i.scdn.co/image/ab67616d000048515076e4160d018e378f488c33", + "width": 64 + }, + { + "height": 640, + "url": "https://i.scdn.co/image/ab67616d0000b2735076e4160d018e378f488c33", + "width": 640 + } + ], + "name": "THE TORTURED POETS DEPARTMENT", + "release_date": "2024-04-18", + "release_date_precision": "day", + "total_tracks": 16, + "type": "album", + "uri": "spotify:album:1Mo4aZ8pdj6L1jx8zSwJnt" + } + ] + } + + Please convert this JSON into Markdown with the following layout for each album: + + - Display the album image (using Markdown image syntax) first. + - On the next line immediately after the image, display the album title, artist name (use the first artist listed), and the release date, all separated by a hyphen or another clear delimiter. + - On the next line, provide the Spotify link (using Markdown link syntax). + + For example, the output should look similar to this (using the data above): + + ![Album Image](https://i.scdn.co/image/ab67616d00001e025076e4160d018e378f488c33) + **THE TORTURED POETS DEPARTMENT** + Taylor Swift - 2024-04-18 + [Listen on Spotify](https://open.spotify.com/album/1Mo4aZ8pdj6L1jx8zSwJnt) + Arist Id: 06HL4z0CvFAxyc27GXpf02 +
+ + Make sure your output is valid Markdown. And don't say "formatted in Markdown". Thanks! + +llm_providers: + - name: OpenAI + provider_interface: openai + access_key: $OPENAI_API_KEY + model: gpt-4o + default: true + +prompt_targets: + - name: get_new_releases + description: Get a list of new album releases featured in Spotify (shown, for example, on a Spotify player’s “Browse” tab). + parameters: + - name: country + description: the country where the album is released + required: true + type: str + in_path: true + - name: limit + type: integer + description: The maximum number of results to return + default: "5" + endpoint: + name: spotify + path: /v1/browse/new-releases + http_headers: + Authorization: "Bearer $SPOTIFY_CLIENT_KEY" + + - name: get_artist_top_tracks + description: Get information about an artist's top tracks + parameters: + - name: artist_id + description: The ID of the artist. + required: true + type: str + in_path: true + endpoint: + name: spotify + path: /v1/artists/{artist_id}/top-tracks + http_headers: + Authorization: "Bearer $SPOTIFY_CLIENT_KEY" + + # - name: search_for_item + # description: Get information about albums, artists, playlists, tracks, shows, episodes or audiobooks. You can search for an item by its name, creator, or topic. + # parameters: + # - name: q + # description: Your search query, which can include keywords related to the item name, its creator, or its topic. + # required: true + # type: str + # - name: type + # type: str + # description: The type of the item to search for (e.g., album, artist, playlist, track, show, episode, audiobook). + # enum: + # - album + # - artist + # - playlist + # - track + # - show + # - episode + # - audiobook + # required: true + # - name: market + # type: str + # description: A country code + # default: US + # - name: limit + # type: integer + # description: The maximum number of results to return + # default: "5" + # endpoint: + # name: spotify + # path: /v1/search + # http_headers: + # Authorization: "Bearer $SPOTIFY_CLIENT_KEY" diff --git a/demos/use_cases/spotify_bearer_auth/docker-compose.yaml b/demos/use_cases/spotify_bearer_auth/docker-compose.yaml new file mode 100644 index 00000000..32e52c40 --- /dev/null +++ b/demos/use_cases/spotify_bearer_auth/docker-compose.yaml @@ -0,0 +1,21 @@ +services: + chatbot_ui: + build: + context: ../shared/chatbot_ui + ports: + - "18080:8080" + environment: + # this is only because we are running the sample app in the same docker container environemtn as archgw + - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1 + extra_hosts: + - "host.docker.internal:host-gateway" + volumes: + - ./arch_config.yaml:/app/arch_config.yaml + + jaeger: + build: + context: ../shared/jaeger + ports: + - "16686:16686" + - "4317:4317" + - "4318:4318" diff --git a/demos/use_cases/spotify_bearer_auth/run_demo.sh b/demos/use_cases/spotify_bearer_auth/run_demo.sh new file mode 100644 index 00000000..eb47dce6 --- /dev/null +++ b/demos/use_cases/spotify_bearer_auth/run_demo.sh @@ -0,0 +1,47 @@ +#!/bin/bash +set -e + +# Function to start the demo +start_demo() { + # Step 1: Check if .env file exists + if [ -f ".env" ]; then + echo ".env file already exists. Skipping creation." + else + # Step 2: Create `.env` file and set OpenAI key + if [ -z "$OPENAI_API_KEY" ]; then + echo "Error: OPENAI_API_KEY environment variable is not set for the demo." + exit 1 + fi + + echo "Creating .env file..." + echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env + echo ".env file created with OPENAI_API_KEY." + fi + + # Step 3: Start Arch + echo "Starting Arch with arch_config.yaml..." + archgw up arch_config.yaml + + # Step 4: Start developer services + echo "Starting Network Agent using Docker Compose..." + docker compose up -d # Run in detached mode +} + +# Function to stop the demo +stop_demo() { + # Step 1: Stop Docker Compose services + echo "Stopping Network Agent using Docker Compose..." + docker compose down + + # Step 2: Stop Arch + echo "Stopping Arch..." + archgw down +} + +# Main script logic +if [ "$1" == "down" ]; then + stop_demo +else + # Default action is to bring the demo up + start_demo +fi diff --git a/model_server/src/core/function_calling.py b/model_server/src/core/function_calling.py index 25c83818..99dd29ba 100644 --- a/model_server/src/core/function_calling.py +++ b/model_server/src/core/function_calling.py @@ -134,7 +134,7 @@ class ArchIntentHandler(ArchBaseHandler): req.messages, req.tools, self.extra_instruction ) - logger.info(f"[request]: {json.dumps(messages)}") + logger.info(f"[request to arch-fc (intent)]: {json.dumps(messages)}") model_response = self.client.chat.completions.create( messages=messages, @@ -519,9 +519,11 @@ class ArchFunctionHandler(ArchBaseHandler): """ logger.info("[Arch-Function] - ChatCompletion") - messages = self._process_messages(req.messages, req.tools) + messages = self._process_messages( + req.messages, req.tools, metadata=req.metadata + ) - logger.info(f"[request]: {json.dumps(messages)}") + logger.info(f"[request to arch-fc]: {json.dumps(messages)}") # always enable `stream=True` to collect model responses response = self.client.chat.completions.create( diff --git a/model_server/src/core/guardrails.py b/model_server/src/core/guardrails.py index 0d2f34fc..fae4e5ba 100644 --- a/model_server/src/core/guardrails.py +++ b/model_server/src/core/guardrails.py @@ -105,7 +105,7 @@ class ArchGuardHanlder: raise NotImplementedError(f"{req.task} is not supported!") logger.info("[Arch-Guard] - Prediction") - logger.info(f"[request]: {req.input}") + logger.info(f"[request arch-guard]: {req.input}") if len(req.input.split()) < max_num_words: result = self._predict_text(req.task, req.input) diff --git a/model_server/src/core/utils/model_utils.py b/model_server/src/core/utils/model_utils.py index d971d115..7dc71acf 100644 --- a/model_server/src/core/utils/model_utils.py +++ b/model_server/src/core/utils/model_utils.py @@ -16,6 +16,7 @@ class Message(BaseModel): class ChatMessage(BaseModel): messages: List[Message] = [] tools: List[Dict[str, Any]] = [] + metadata: Optional[Dict[str, str]] = {} class Choice(BaseModel): @@ -123,6 +124,7 @@ class ArchBaseHandler: tools: List[Dict[str, Any]] = None, extra_instruction: str = None, max_tokens=4096, + metadata: Dict[str, str] = {}, ): """ Processes a list of messages and formats them appropriately. @@ -157,7 +159,12 @@ class ArchBaseHandler: content = f"\n{json.dumps(tool_calls[0]['function'])}\n" elif role == "tool": role = "user" - content = f"\n{json.dumps(content)}\n" + if metadata.get("optimize_context_window", "false").lower() == "true": + content = f"\n\n" + else: + content = ( + f"\n{json.dumps(content)}\n" + ) processed_messages.append({"role": role, "content": content})