2024-12-20 13:25:01 -08:00
|
|
|
test_cases:
|
|
|
|
|
- id: "[WEATHER AGENT] - single turn, single tool, prompt prefilling"
|
|
|
|
|
input:
|
|
|
|
|
messages:
|
|
|
|
|
- role: "user"
|
|
|
|
|
content: "what is the weather forecast for seattle?"
|
|
|
|
|
tools:
|
2025-01-10 16:45:36 -08:00
|
|
|
- type: "function"
|
2024-12-20 13:25:01 -08:00
|
|
|
function:
|
|
|
|
|
name: "get_current_weather"
|
|
|
|
|
description: "Get current weather at a location."
|
|
|
|
|
parameters:
|
|
|
|
|
type: "object"
|
|
|
|
|
properties:
|
|
|
|
|
location:
|
2025-01-10 16:45:36 -08:00
|
|
|
type: "string"
|
2024-12-20 13:25:01 -08:00
|
|
|
description: "The location to get the weather for"
|
|
|
|
|
format: "City, State"
|
|
|
|
|
days:
|
2025-01-10 16:45:36 -08:00
|
|
|
type: "integer"
|
|
|
|
|
description: "The number of days for the request."
|
|
|
|
|
required:
|
|
|
|
|
- location
|
|
|
|
|
- days
|
2024-12-20 13:25:01 -08:00
|
|
|
expected:
|
2025-01-10 16:45:36 -08:00
|
|
|
- type: "metadata"
|
|
|
|
|
hallucination: false
|
2024-12-20 13:25:01 -08:00
|
|
|
|
|
|
|
|
- id: "[WEATHER AGENT] - single turn, single tool, hallucination"
|
|
|
|
|
input:
|
|
|
|
|
messages:
|
|
|
|
|
- role: "user"
|
|
|
|
|
content: "what is the weather in Seattle in days?"
|
|
|
|
|
tools:
|
2025-01-10 16:45:36 -08:00
|
|
|
- type: "function"
|
2024-12-20 13:25:01 -08:00
|
|
|
function:
|
|
|
|
|
name: "get_current_weather"
|
|
|
|
|
description: "Get current weather at a location."
|
|
|
|
|
parameters:
|
|
|
|
|
type: "object"
|
|
|
|
|
properties:
|
|
|
|
|
location:
|
|
|
|
|
type: "str"
|
|
|
|
|
description: "The location to get the weather for"
|
|
|
|
|
format: "City, State"
|
|
|
|
|
days:
|
|
|
|
|
type: "int"
|
|
|
|
|
description: "the number of days for the request."
|
|
|
|
|
required: ["location", "days"]
|
|
|
|
|
expected:
|
|
|
|
|
- type: "metadata"
|
|
|
|
|
hallucination: true
|
|
|
|
|
|
|
|
|
|
- id: "[WEATHER AGENT] - multi turn, single tool, all params passed"
|
|
|
|
|
input:
|
|
|
|
|
messages:
|
|
|
|
|
- role: "user"
|
|
|
|
|
content: "how is the weather in chicago for next 5 days?"
|
|
|
|
|
- role: "assistant"
|
|
|
|
|
content: "Can you tell me your location and how many days you want?"
|
|
|
|
|
- role: "user"
|
|
|
|
|
content: "Seattle"
|
|
|
|
|
- role: "assistant"
|
|
|
|
|
content: "Can you please provide me the days for the weather forecast?"
|
|
|
|
|
- role: "user"
|
2025-02-14 09:57:39 -08:00
|
|
|
content: "5 days"
|
2024-12-20 13:25:01 -08:00
|
|
|
tools:
|
|
|
|
|
- type: "function"
|
|
|
|
|
function:
|
|
|
|
|
name: "get_current_weather"
|
|
|
|
|
description: "Get current weather at a location."
|
|
|
|
|
parameters:
|
|
|
|
|
type: "object"
|
|
|
|
|
properties:
|
|
|
|
|
location:
|
|
|
|
|
type: "str"
|
|
|
|
|
description: "The location to get the weather for"
|
|
|
|
|
format: "City, State"
|
|
|
|
|
days:
|
|
|
|
|
type: "int"
|
|
|
|
|
description: "the number of days for the request."
|
|
|
|
|
required: ["location", "days"]
|
|
|
|
|
expected:
|
|
|
|
|
- type: "metadata"
|
2025-02-14 09:57:39 -08:00
|
|
|
hallucination: false
|
2024-12-20 13:25:01 -08:00
|
|
|
|
|
|
|
|
- id: "[WEATHER AGENT] - multi turn, single tool, clarification"
|
|
|
|
|
input:
|
|
|
|
|
messages:
|
|
|
|
|
- role: "user"
|
|
|
|
|
content: "how is the weather for next 5 days?"
|
|
|
|
|
- role: "assistant"
|
|
|
|
|
content: "Can you tell me your location and how many days you want?"
|
|
|
|
|
- role: "user"
|
|
|
|
|
content: "Seattle"
|
|
|
|
|
- role: "assistant"
|
|
|
|
|
content: "Can you please provide me the days for the weather forecast?"
|
|
|
|
|
- role: "user"
|
|
|
|
|
content: "Sorry, the location is actually los angeles in 5 days"
|
|
|
|
|
tools:
|
|
|
|
|
- type: "function"
|
|
|
|
|
function:
|
|
|
|
|
name: "get_current_weather"
|
|
|
|
|
description: "Get current weather at a location."
|
|
|
|
|
parameters:
|
|
|
|
|
type: "object"
|
|
|
|
|
properties:
|
|
|
|
|
location:
|
|
|
|
|
type: "str"
|
|
|
|
|
description: "The location to get the weather for"
|
|
|
|
|
format: "City, State"
|
|
|
|
|
days:
|
|
|
|
|
type: "int"
|
|
|
|
|
description: "the number of days for the request."
|
|
|
|
|
required: ["location", "days"]
|
|
|
|
|
expected:
|
|
|
|
|
- type: "metadata"
|
|
|
|
|
hallucination: false
|
2025-01-10 16:45:36 -08:00
|
|
|
|
2024-12-20 13:25:01 -08:00
|
|
|
- id: "[SALE AGENT] - single turn, single tool, hallucination region"
|
|
|
|
|
input:
|
|
|
|
|
messages:
|
|
|
|
|
- role: "user"
|
|
|
|
|
content: "get me sales opportunities of tech"
|
|
|
|
|
tools:
|
|
|
|
|
- type: "function"
|
|
|
|
|
function:
|
|
|
|
|
name: "sales_opportunity"
|
|
|
|
|
description: "Retrieve potential sales opportunities based for a particular industry type in a region."
|
|
|
|
|
parameters:
|
|
|
|
|
type: "object"
|
|
|
|
|
properties:
|
|
|
|
|
region:
|
|
|
|
|
type: "str"
|
|
|
|
|
description: "Geographical region to identify sales opportunities."
|
|
|
|
|
industry:
|
|
|
|
|
type: "str"
|
|
|
|
|
description: "Industry type."
|
|
|
|
|
max_results:
|
|
|
|
|
type: "int"
|
|
|
|
|
description: "Maximum number of sales opportunities to retrieve."
|
|
|
|
|
default: 20
|
|
|
|
|
required: ["region", "industry"]
|
|
|
|
|
expected:
|
|
|
|
|
- type: "metadata"
|
|
|
|
|
hallucination: true
|
|
|
|
|
|
|
|
|
|
- id: "[SALE AGENT] - single turn, single tool, hallucination industry"
|
|
|
|
|
input:
|
|
|
|
|
messages:
|
|
|
|
|
- role: "user"
|
|
|
|
|
content: "get me sales opportunities in NA"
|
|
|
|
|
tools:
|
|
|
|
|
- type: "function"
|
|
|
|
|
function:
|
|
|
|
|
name: "sales_opportunity"
|
|
|
|
|
description: "Retrieve potential sales opportunities based for a particular industry type in a region."
|
|
|
|
|
parameters:
|
|
|
|
|
type: "object"
|
|
|
|
|
properties:
|
|
|
|
|
region:
|
|
|
|
|
type: "str"
|
|
|
|
|
description: "Geographical region to identify sales opportunities."
|
|
|
|
|
industry:
|
|
|
|
|
type: "str"
|
|
|
|
|
description: "Industry type."
|
|
|
|
|
max_results:
|
|
|
|
|
type: "int"
|
|
|
|
|
description: "Maximum number of sales opportunities to retrieve."
|
|
|
|
|
default: 20
|
|
|
|
|
required: ["region", "industry"]
|
|
|
|
|
expected:
|
|
|
|
|
- type: "metadata"
|
|
|
|
|
hallucination: true
|
|
|
|
|
|
|
|
|
|
- id: "[PRODUCT AGENT] - single turn, single tool, hallucination industry"
|
|
|
|
|
input:
|
|
|
|
|
messages:
|
|
|
|
|
- role: "user"
|
|
|
|
|
content: "get me sales opportunities in NA"
|
|
|
|
|
tools:
|
|
|
|
|
- type: "function"
|
|
|
|
|
function:
|
|
|
|
|
name: "product_recommendation"
|
|
|
|
|
description: "Place an order for an iphone with user_id 195 and location is 1600 pensylvania ave"
|
|
|
|
|
parameters:
|
|
|
|
|
type: "object"
|
|
|
|
|
properties:
|
|
|
|
|
user_id:
|
|
|
|
|
type: "str"
|
|
|
|
|
description: "Unique identifier for the user."
|
|
|
|
|
category:
|
|
|
|
|
type: "str"
|
|
|
|
|
description: "Product category for recommendations."
|
|
|
|
|
max_results:
|
|
|
|
|
type: "int"
|
|
|
|
|
description: "Maximum number of recommended products to show."
|
|
|
|
|
default: 10
|
|
|
|
|
required: ["user_id", "category"]
|
|
|
|
|
- type: "function"
|
|
|
|
|
function:
|
|
|
|
|
name: "place_order"
|
|
|
|
|
description: "Place and pay for an order for one or more products to ship to the an address."
|
|
|
|
|
parameters:
|
|
|
|
|
type: "object"
|
|
|
|
|
properties:
|
|
|
|
|
user_id:
|
|
|
|
|
type: "str"
|
|
|
|
|
description: "Unique identifier for the user placing the order."
|
|
|
|
|
product_ids:
|
|
|
|
|
type: "array"
|
|
|
|
|
description: "List of product IDs to include in the order."
|
|
|
|
|
shipping_address:
|
|
|
|
|
type: "str"
|
|
|
|
|
description: "Shipping address for the order."
|
|
|
|
|
payment_method:
|
|
|
|
|
type: "str"
|
|
|
|
|
description: "Payment method for the order."
|
|
|
|
|
required: ["user_id", "product_ids", "shipping_address", "payment_method"]
|
|
|
|
|
- type: "function"
|
|
|
|
|
function:
|
|
|
|
|
name: "sales_opportunity"
|
|
|
|
|
description: "Retrieve potential sales opportunities based for a particular industry type in a region."
|
|
|
|
|
parameters:
|
|
|
|
|
type: "object"
|
|
|
|
|
properties:
|
|
|
|
|
region:
|
|
|
|
|
type: "str"
|
|
|
|
|
description: "Geographical region to identify sales opportunities."
|
|
|
|
|
industry:
|
|
|
|
|
type: "str"
|
|
|
|
|
description: "Industry type."
|
|
|
|
|
max_results:
|
|
|
|
|
type: "int"
|
|
|
|
|
description: "Maximum number of sales opportunities to retrieve."
|
|
|
|
|
default: 20
|
|
|
|
|
required: ["region", "industry"]
|
|
|
|
|
- type: "function"
|
|
|
|
|
function:
|
|
|
|
|
name: "query_database"
|
|
|
|
|
description: "Perform a database query to retrieve or update information."
|
|
|
|
|
parameters:
|
|
|
|
|
type: "object"
|
|
|
|
|
properties:
|
|
|
|
|
query:
|
|
|
|
|
type: "str"
|
|
|
|
|
description: "SQL query string to execute against the database."
|
|
|
|
|
parameters:
|
|
|
|
|
type: "array"
|
|
|
|
|
description: "List of parameters to safely inject into the SQL query (to prevent SQL injection)."
|
|
|
|
|
operation:
|
|
|
|
|
type: "str"
|
|
|
|
|
description: "Type of operation."
|
|
|
|
|
required: ["query", "operation"]
|
|
|
|
|
expected:
|
|
|
|
|
- type: "metadata"
|
|
|
|
|
hallucination: true
|