test_cases: - id: "[WEATHER AGENT] - single turn, single tool, prompt prefilling" input: messages: - role: "user" content: "what is the weather forecast for seattle?" tools: - type: "function" function: name: "get_current_weather" description: "Get current weather at a location." parameters: type: "object" properties: location: type: "string" description: "The location to get the weather for" format: "City, State" days: type: "integer" description: "The number of days for the request." required: - location - days expected: - type: "metadata" hallucination: false - id: "[WEATHER AGENT] - single turn, single tool, hallucination" input: messages: - role: "user" content: "what is the weather in Seattle in days?" tools: - type: "function" function: name: "get_current_weather" description: "Get current weather at a location." parameters: type: "object" properties: location: type: "str" description: "The location to get the weather for" format: "City, State" days: type: "int" description: "the number of days for the request." required: ["location", "days"] expected: - type: "metadata" hallucination: true - id: "[WEATHER AGENT] - multi turn, single tool, all params passed" input: messages: - role: "user" content: "how is the weather in chicago for next 5 days?" - role: "assistant" content: "Can you tell me your location and how many days you want?" - role: "user" content: "Seattle" - role: "assistant" content: "Can you please provide me the days for the weather forecast?" - role: "user" content: "5 days" tools: - type: "function" function: name: "get_current_weather" description: "Get current weather at a location." parameters: type: "object" properties: location: type: "str" description: "The location to get the weather for" format: "City, State" days: type: "int" description: "the number of days for the request." required: ["location", "days"] expected: - type: "metadata" hallucination: false - id: "[WEATHER AGENT] - multi turn, single tool, clarification" input: messages: - role: "user" content: "how is the weather for next 5 days?" - role: "assistant" content: "Can you tell me your location and how many days you want?" - role: "user" content: "Seattle" - role: "assistant" content: "Can you please provide me the days for the weather forecast?" - role: "user" content: "Sorry, the location is actually los angeles in 5 days" tools: - type: "function" function: name: "get_current_weather" description: "Get current weather at a location." parameters: type: "object" properties: location: type: "str" description: "The location to get the weather for" format: "City, State" days: type: "int" description: "the number of days for the request." required: ["location", "days"] expected: - type: "metadata" hallucination: false - id: "[SALE AGENT] - single turn, single tool, hallucination region" input: messages: - role: "user" content: "get me sales opportunities of tech" tools: - type: "function" function: name: "sales_opportunity" description: "Retrieve potential sales opportunities based for a particular industry type in a region." parameters: type: "object" properties: region: type: "str" description: "Geographical region to identify sales opportunities." industry: type: "str" description: "Industry type." max_results: type: "int" description: "Maximum number of sales opportunities to retrieve." default: 20 required: ["region", "industry"] expected: - type: "metadata" hallucination: true - id: "[SALE AGENT] - single turn, single tool, hallucination industry" input: messages: - role: "user" content: "get me sales opportunities in NA" tools: - type: "function" function: name: "sales_opportunity" description: "Retrieve potential sales opportunities based for a particular industry type in a region." parameters: type: "object" properties: region: type: "str" description: "Geographical region to identify sales opportunities." industry: type: "str" description: "Industry type." max_results: type: "int" description: "Maximum number of sales opportunities to retrieve." default: 20 required: ["region", "industry"] expected: - type: "metadata" hallucination: true - id: "[PRODUCT AGENT] - single turn, single tool, hallucination industry" input: messages: - role: "user" content: "get me sales opportunities in NA" tools: - type: "function" function: name: "product_recommendation" description: "Place an order for an iphone with user_id 195 and location is 1600 pensylvania ave" parameters: type: "object" properties: user_id: type: "str" description: "Unique identifier for the user." category: type: "str" description: "Product category for recommendations." max_results: type: "int" description: "Maximum number of recommended products to show." default: 10 required: ["user_id", "category"] - type: "function" function: name: "place_order" description: "Place and pay for an order for one or more products to ship to the an address." parameters: type: "object" properties: user_id: type: "str" description: "Unique identifier for the user placing the order." product_ids: type: "array" description: "List of product IDs to include in the order." shipping_address: type: "str" description: "Shipping address for the order." payment_method: type: "str" description: "Payment method for the order." required: ["user_id", "product_ids", "shipping_address", "payment_method"] - type: "function" function: name: "sales_opportunity" description: "Retrieve potential sales opportunities based for a particular industry type in a region." parameters: type: "object" properties: region: type: "str" description: "Geographical region to identify sales opportunities." industry: type: "str" description: "Industry type." max_results: type: "int" description: "Maximum number of sales opportunities to retrieve." default: 20 required: ["region", "industry"] - type: "function" function: name: "query_database" description: "Perform a database query to retrieve or update information." parameters: type: "object" properties: query: type: "str" description: "SQL query string to execute against the database." parameters: type: "array" description: "List of parameters to safely inject into the SQL query (to prevent SQL injection)." operation: type: "str" description: "Type of operation." required: ["query", "operation"] expected: - type: "metadata" hallucination: true