From 859566ee5933555f1775e2c8503f6b7305023a7c Mon Sep 17 00:00:00 2001 From: Entropix Date: Wed, 31 Dec 2025 23:04:47 +0800 Subject: [PATCH] Implement flexible HTTP agent adapter with request templates and connection guides - Add request_template, response_path, method, query_params, and parse_structured_input to AgentConfig - Implement structured input parser for key-value extraction from golden prompts - Implement template engine with variable substitution for {prompt} and {field_name} - Implement response extractor supporting JSONPath and dot notation - Update HTTPAgentAdapter to support all HTTP methods (GET, POST, PUT, PATCH, DELETE) - Add comprehensive connection guide explaining localhost vs public endpoints - Update documentation with examples for TypeScript/JavaScript developers - Add tests for all new features --- .gitignore | 1 + README.md | 1 + docs/CONFIGURATION_GUIDE.md | 131 +++++- docs/CONNECTION_GUIDE.md | 317 ++++++++++++++ docs/DEVELOPER_FAQ.md | 103 +++++ docs/USAGE_GUIDE.md | 710 +++++++++++++++++++++++++++++++- flakestorm.yaml.example | 63 ++- src/flakestorm/core/config.py | 36 ++ src/flakestorm/core/protocol.py | 223 +++++++++- tests/test_adapters.py | 285 +++++++++++++ 10 files changed, 1839 insertions(+), 31 deletions(-) create mode 100644 docs/CONNECTION_GUIDE.md diff --git a/.gitignore b/.gitignore index cdbaacc..a51a674 100644 --- a/.gitignore +++ b/.gitignore @@ -110,6 +110,7 @@ docs/* # Now un-ignore the specific files we want to track !docs/USAGE_GUIDE.md !docs/CONFIGURATION_GUIDE.md +!docs/CONNECTION_GUIDE.md !docs/TEST_SCENARIOS.md !docs/MODULES.md !docs/DEVELOPER_FAQ.md diff --git a/README.md b/README.md index 1597df4..964e62c 100644 --- a/README.md +++ b/README.md @@ -333,6 +333,7 @@ Where: ### Getting Started - [๐Ÿ“– Usage Guide](docs/USAGE_GUIDE.md) - Complete end-to-end guide - [โš™๏ธ Configuration Guide](docs/CONFIGURATION_GUIDE.md) - All configuration options +- [๐Ÿ”Œ Connection Guide](docs/CONNECTION_GUIDE.md) - How to connect FlakeStorm to your agent - [๐Ÿงช Test Scenarios](docs/TEST_SCENARIOS.md) - Real-world examples with code ### For Developers diff --git a/docs/CONFIGURATION_GUIDE.md b/docs/CONFIGURATION_GUIDE.md index ae73b7e..b36b634 100644 --- a/docs/CONFIGURATION_GUIDE.md +++ b/docs/CONFIGURATION_GUIDE.md @@ -47,6 +47,10 @@ Define how flakestorm connects to your AI agent. ### HTTP Agent +FlakeStorm's HTTP adapter is highly flexible and supports any endpoint format through request templates and response path configuration. + +#### Basic Configuration + ```yaml agent: endpoint: "http://localhost:8000/invoke" @@ -57,7 +61,7 @@ agent: Content-Type: "application/json" ``` -**Expected API Format:** +**Default Format (if no template specified):** Request: ```json @@ -70,6 +74,126 @@ Response: {"output": "agent response text"} ``` +#### Custom Request Template + +Map your endpoint's exact format using `request_template`: + +```yaml +agent: + endpoint: "http://localhost:8000/api/chat" + type: "http" + method: "POST" + request_template: | + {"message": "{prompt}", "stream": false} + response_path: "$.reply" +``` + +**Template Variables:** +- `{prompt}` - Full golden prompt text +- `{field_name}` - Parsed structured input fields (see Structured Input below) + +#### Structured Input Parsing + +For agents that accept structured input (like your Reddit query generator): + +```yaml +agent: + endpoint: "http://localhost:8000/generate-query" + type: "http" + method: "POST" + request_template: | + { + "industry": "{industry}", + "productName": "{productName}", + "businessModel": "{businessModel}", + "targetMarket": "{targetMarket}", + "description": "{description}" + } + response_path: "$.query" + parse_structured_input: true # Default: true +``` + +**Golden Prompt Format:** +```yaml +golden_prompts: + - | + Industry: Fitness tech + Product/Service: AI personal trainer app + Business Model: B2C + Target Market: fitness enthusiasts + Description: An app that provides personalized workout plans +``` + +FlakeStorm will automatically parse this and map fields to your template. + +#### HTTP Methods + +Support for all HTTP methods: + +**GET Request:** +```yaml +agent: + endpoint: "http://api.example.com/search" + type: "http" + method: "GET" + request_template: "q={prompt}" + query_params: + api_key: "${API_KEY}" + format: "json" +``` + +**PUT Request:** +```yaml +agent: + endpoint: "http://api.example.com/update" + type: "http" + method: "PUT" + request_template: | + {"id": "123", "content": "{prompt}"} +``` + +#### Response Path Extraction + +Extract responses from complex JSON structures: + +```yaml +agent: + endpoint: "http://api.example.com/chat" + type: "http" + response_path: "$.choices[0].message.content" # JSONPath + # OR + response_path: "data.result" # Dot notation +``` + +**Supported Formats:** +- JSONPath: `"$.data.result"`, `"$.choices[0].message.content"` +- Dot notation: `"data.result"`, `"response.text"` +- Simple key: `"output"`, `"response"` + +#### Complete Example + +```yaml +agent: + endpoint: "http://localhost:8000/api/v1/agent" + type: "http" + method: "POST" + timeout: 30000 + headers: + Authorization: "Bearer ${API_KEY}" + Content-Type: "application/json" + request_template: | + { + "messages": [ + {"role": "user", "content": "{prompt}"} + ], + "temperature": 0.7 + } + response_path: "$.choices[0].message.content" + query_params: + version: "v1" + parse_structured_input: true +``` + ### Python Agent ```yaml @@ -109,6 +233,11 @@ chain: Runnable = ... # Your LangChain chain |--------|------|---------|-------------| | `endpoint` | string | required | URL or module path | | `type` | string | `"http"` | `http`, `python`, or `langchain` | +| `method` | string | `"POST"` | HTTP method: `GET`, `POST`, `PUT`, `PATCH`, `DELETE` | +| `request_template` | string | `null` | Template for request body/query with `{prompt}` or `{field_name}` variables | +| `response_path` | string | `null` | JSONPath or dot notation to extract response (e.g., `"$.data.result"`) | +| `query_params` | object | `{}` | Static query parameters (supports env vars) | +| `parse_structured_input` | boolean | `true` | Whether to parse structured golden prompts into key-value pairs | | `timeout` | integer | `30000` | Request timeout in ms (1000-300000) | | `headers` | object | `{}` | HTTP headers (supports env vars) | diff --git a/docs/CONNECTION_GUIDE.md b/docs/CONNECTION_GUIDE.md new file mode 100644 index 0000000..1fc7cfa --- /dev/null +++ b/docs/CONNECTION_GUIDE.md @@ -0,0 +1,317 @@ +# FlakeStorm Connection Guide + +This guide explains how to connect FlakeStorm to your agent, covering different scenarios from localhost to public endpoints, and options for internal code. + +--- + +## Table of Contents + +1. [Connection Requirements](#connection-requirements) +2. [Localhost vs Public Endpoints](#localhost-vs-public-endpoints) +3. [Internal Code Options](#internal-code-options) +4. [Exposing Local Endpoints](#exposing-local-endpoints) +5. [Troubleshooting](#troubleshooting) + +--- + +## Connection Requirements + +### When Do You Need an HTTP Endpoint? + +| Your Agent Code | Adapter Type | Endpoint Needed? | Notes | +|----------------|--------------|------------------|-------| +| Python (internal) | Python adapter | โŒ No | Use `type: "python"`, call function directly | +| TypeScript/JavaScript | HTTP adapter | โœ… Yes | Must create HTTP endpoint (can be localhost) | +| Java/Go/Rust | HTTP adapter | โœ… Yes | Must create HTTP endpoint (can be localhost) | +| Already has HTTP API | HTTP adapter | โœ… Yes | Use existing endpoint | + +**Key Point:** FlakeStorm is a Python CLI tool. It can only directly call Python functions. For non-Python code, you **must** create an HTTP endpoint wrapper. + +--- + +## Localhost vs Public Endpoints + +### When Localhost Works + +| FlakeStorm Location | Agent Location | Endpoint Type | Works? | +|---------------------|----------------|---------------|--------| +| Same machine | Same machine | `localhost:8000` | โœ… Yes | +| Different machine | Your machine | `localhost:8000` | โŒ No | +| CI/CD server | Your machine | `localhost:8000` | โŒ No | +| CI/CD server | Cloud (AWS/GCP) | `https://api.example.com` | โœ… Yes | + +**Rule of Thumb:** If FlakeStorm and your agent run on the **same machine**, use `localhost`. Otherwise, you need a **public endpoint**. + +--- + +## Internal Code Options + +### Option 1: Python Adapter (Recommended for Python Code) + +If your agent code is in Python, use the Python adapter - **no HTTP endpoint needed**: + +```python +# my_agent.py +async def flakestorm_agent(input: str) -> str: + """ + FlakeStorm will call this function directly. + + Args: + input: The golden prompt text (may be structured) + + Returns: + The agent's response as a string + """ + # Parse input, call your internal functions + params = parse_structured_input(input) + result = await your_internal_function(params) + return result +``` + +```yaml +# flakestorm.yaml +agent: + endpoint: "my_agent:flakestorm_agent" + type: "python" # โ† No HTTP endpoint needed! +``` + +**Benefits:** +- No server setup required +- Faster (no HTTP overhead) +- Works offline +- No network configuration + +### Option 2: HTTP Wrapper Endpoint (Required for Non-Python Code) + +For TypeScript/JavaScript/Java/Go/Rust, create a simple HTTP wrapper: + +**TypeScript/Node.js Example:** +```typescript +// test-endpoint.ts +import express from 'express'; +import { generateRedditSearchQuery } from './your-internal-code'; + +const app = express(); +app.use(express.json()); + +app.post('/flakestorm-test', async (req, res) => { + // FlakeStorm sends: {"input": "Industry: X\nProduct: Y..."} + const structuredText = req.body.input; + + // Parse structured input + const params = parseStructuredInput(structuredText); + + // Call your internal function + const query = await generateRedditSearchQuery(params); + + // Return in FlakeStorm's expected format + res.json({ output: query }); +}); + +app.listen(8000, () => { + console.log('FlakeStorm test endpoint: http://localhost:8000/flakestorm-test'); +}); +``` + +**Python FastAPI Example:** +```python +# test_endpoint.py +from fastapi import FastAPI +from pydantic import BaseModel + +app = FastAPI() + +class Request(BaseModel): + input: str + +@app.post("/flakestorm-test") +async def flakestorm_test(request: Request): + # Parse structured input + params = parse_structured_input(request.input) + + # Call your internal function + result = await your_internal_function(params) + + return {"output": result} +``` + +Then in `flakestorm.yaml`: +```yaml +agent: + endpoint: "http://localhost:8000/flakestorm-test" + type: "http" + request_template: | + { + "industry": "{industry}", + "productName": "{productName}", + "businessModel": "{businessModel}", + "targetMarket": "{targetMarket}", + "description": "{description}" + } + response_path: "$.output" +``` + +--- + +## Exposing Local Endpoints + +If FlakeStorm runs on a different machine (e.g., CI/CD), you need to expose your local endpoint publicly. + +### Option 1: ngrok (Recommended) + +```bash +# Install ngrok +brew install ngrok # macOS +# Or download from https://ngrok.com/download + +# Expose local port 8000 +ngrok http 8000 + +# Output: +# Forwarding https://abc123.ngrok.io -> http://localhost:8000 +``` + +Then use the ngrok URL in your config: +```yaml +agent: + endpoint: "https://abc123.ngrok.io/flakestorm-test" + type: "http" +``` + +### Option 2: localtunnel + +```bash +# Install +npm install -g localtunnel + +# Expose port +lt --port 8000 + +# Output: +# your url is: https://xyz.localtunnel.me +``` + +### Option 3: Deploy to Cloud + +Deploy your test endpoint to a cloud service: +- **Vercel** (for Node.js/TypeScript) +- **Railway** (any language) +- **Fly.io** (any language) +- **AWS Lambda** (serverless) + +### Option 4: VPN/SSH Tunnel + +If both machines are on the same network: +```bash +# SSH tunnel +ssh -L 8000:localhost:8000 user@agent-machine + +# Then use localhost:8000 in config +``` + +--- + +## Troubleshooting + +### "Connection Refused" Error + +**Problem:** FlakeStorm can't reach your endpoint. + +**Solutions:** +1. **Check if agent is running:** + ```bash + curl http://localhost:8000/health + ``` + +2. **Verify endpoint URL in config:** + ```yaml + agent: + endpoint: "http://localhost:8000/invoke" # Check this matches your server + ``` + +3. **Check firewall:** + ```bash + # macOS: System Preferences > Security & Privacy > Firewall + # Linux: sudo ufw allow 8000 + ``` + +4. **For Docker/containers:** + - Use `host.docker.internal:8000` instead of `localhost:8000` + - Or use container networking + +### "Timeout" Error + +**Problem:** Agent takes too long to respond. + +**Solutions:** +1. **Increase timeout:** + ```yaml + agent: + timeout: 60000 # 60 seconds + ``` + +2. **Check agent performance:** + - Is the agent actually processing requests? + - Are there network issues? + +### "Invalid Response Format" Error + +**Problem:** Response doesn't match expected format. + +**Solutions:** +1. **Use response_path:** + ```yaml + agent: + response_path: "$.data.result" # Extract from nested JSON + ``` + +2. **Check actual response:** + ```bash + curl -X POST http://localhost:8000/invoke \ + -H "Content-Type: application/json" \ + -d '{"input": "test"}' + ``` + +3. **Update request_template if needed:** + ```yaml + agent: + request_template: | + {"your_field": "{prompt}"} + ``` + +### Network Connectivity Issues + +**Problem:** Can't connect from CI/CD or remote machine. + +**Solutions:** +1. **Use public endpoint** (ngrok, cloud deployment) +2. **Check network policies** (corporate firewall, VPN) +3. **Verify DNS resolution** (if using domain name) +4. **Test with curl** from the same machine FlakeStorm runs on + +--- + +## Best Practices + +1. **For Development:** Use Python adapter if possible (fastest, simplest) +2. **For Testing:** Use localhost HTTP endpoint (easy to debug) +3. **For CI/CD:** Use public endpoint or cloud deployment +4. **For Production Testing:** Use production endpoint with proper authentication +5. **Security:** Never commit API keys - use environment variables + +--- + +## Quick Reference + +| Scenario | Solution | +|----------|----------| +| Python code, same machine | Python adapter (`type: "python"`) | +| TypeScript/JS, same machine | HTTP endpoint (`localhost:8000`) | +| Any language, CI/CD | Public endpoint (ngrok/cloud) | +| Already has HTTP API | Use existing endpoint | +| Need custom request format | Use `request_template` | +| Complex response structure | Use `response_path` | + +--- + +*For more examples, see [Configuration Guide](CONFIGURATION_GUIDE.md) and [Usage Guide](USAGE_GUIDE.md).* diff --git a/docs/DEVELOPER_FAQ.md b/docs/DEVELOPER_FAQ.md index e24708b..0b2d9ff 100644 --- a/docs/DEVELOPER_FAQ.md +++ b/docs/DEVELOPER_FAQ.md @@ -456,6 +456,109 @@ class PythonAgentAdapter: --- +### Q: When do I need to create an HTTP endpoint vs use Python adapter? + +**A:** It depends on your agent's language and setup: + +| Your Agent Code | Adapter Type | Endpoint Needed? | Notes | +|----------------|--------------|------------------|-------| +| Python (internal) | Python adapter | โŒ No | Use `type: "python"`, call function directly | +| TypeScript/JavaScript | HTTP adapter | โœ… Yes | Must create HTTP endpoint (can be localhost) | +| Java/Go/Rust | HTTP adapter | โœ… Yes | Must create HTTP endpoint (can be localhost) | +| Already has HTTP API | HTTP adapter | โœ… Yes | Use existing endpoint | + +**For non-Python code (TypeScript example):** + +Since FlakeStorm is a Python CLI tool, it can only directly call Python functions. For TypeScript/JavaScript/other languages, you **must** create an HTTP endpoint: + +```typescript +// test-endpoint.ts - Wrapper endpoint for FlakeStorm +import express from 'express'; +import { generateRedditSearchQuery } from './your-internal-code'; + +const app = express(); +app.use(express.json()); + +app.post('/flakestorm-test', async (req, res) => { + // FlakeStorm sends: {"input": "Industry: X\nProduct: Y..."} + const structuredText = req.body.input; + + // Parse structured input + const params = parseStructuredInput(structuredText); + + // Call your internal function + const query = await generateRedditSearchQuery(params); + + // Return in FlakeStorm's expected format + res.json({ output: query }); +}); + +app.listen(8000, () => { + console.log('FlakeStorm test endpoint: http://localhost:8000/flakestorm-test'); +}); +``` + +Then in `flakestorm.yaml`: +```yaml +agent: + endpoint: "http://localhost:8000/flakestorm-test" + type: "http" + request_template: | + { + "industry": "{industry}", + "productName": "{productName}", + "businessModel": "{businessModel}", + "targetMarket": "{targetMarket}", + "description": "{description}" + } + response_path: "$.output" +``` + +--- + +### Q: Do I need a public endpoint or can I use localhost? + +**A:** It depends on where FlakeStorm runs: + +| FlakeStorm Location | Agent Location | Endpoint Type | Works? | +|---------------------|----------------|---------------|--------| +| Same machine | Same machine | `localhost:8000` | โœ… Yes | +| Different machine | Your machine | `localhost:8000` | โŒ No - use public endpoint or ngrok | +| CI/CD server | Your machine | `localhost:8000` | โŒ No - use public endpoint | +| CI/CD server | Cloud (AWS/GCP) | `https://api.example.com` | โœ… Yes | + +**Options for exposing local endpoint:** +1. **ngrok**: `ngrok http 8000` โ†’ get public URL +2. **localtunnel**: `lt --port 8000` โ†’ get public URL +3. **Deploy to cloud**: Deploy your test endpoint to a cloud service +4. **VPN/SSH tunnel**: If both machines are on same network + +--- + +### Q: Can I test internal code without creating an endpoint? + +**A:** Only if your code is in Python: + +```python +# my_agent.py +async def flakestorm_agent(input: str) -> str: + # Parse input, call your internal functions + return result +``` + +```yaml +# flakestorm.yaml +agent: + endpoint: "my_agent:flakestorm_agent" + type: "python" # โ† No HTTP endpoint needed! +``` + +For non-Python code, you **must** create an HTTP endpoint wrapper. + +See [Connection Guide](CONNECTION_GUIDE.md) for detailed examples and troubleshooting. + +--- + ## Testing & Quality ### Q: Why are tests split by module? diff --git a/docs/USAGE_GUIDE.md b/docs/USAGE_GUIDE.md index 62ebff4..0ad5e7c 100644 --- a/docs/USAGE_GUIDE.md +++ b/docs/USAGE_GUIDE.md @@ -455,23 +455,280 @@ open reports/flakestorm-*.html **What they are:** Carefully crafted prompts that represent your agent's core use cases. These are prompts that *should always work correctly*. -**How to choose them:** -- Cover all major user intents -- Include edge cases you've seen in production -- Represent different complexity levels +#### Understanding Golden Prompts vs System Prompts +**Key Distinction:** +- **System Prompt**: Instructions that define your agent's role and behavior (stays in your code) +- **Golden Prompt**: Example user inputs that should work correctly (what FlakeStorm mutates and tests) + +**Example:** +```javascript +// System Prompt (in your agent code - NOT in flakestorm.yaml) +const systemPrompt = `You are a helpful assistant that books flights...`; + +// Golden Prompts (in flakestorm.yaml - what FlakeStorm tests) +golden_prompts: + - "Book a flight from NYC to LA" + - "I need to fly to Paris next Monday" +``` + +FlakeStorm takes your golden prompts, mutates them (adds typos, paraphrases, etc.), and sends them to your agent. Your agent processes them using its system prompt. + +#### How to Choose Golden Prompts + +**1. Cover All Major User Intents** +```yaml +golden_prompts: + # Primary use case + - "Book a flight from New York to Los Angeles" + + # Secondary use case + - "What's my account balance?" + + # Another feature + - "Cancel my reservation #12345" +``` + +**2. Include Different Complexity Levels** ```yaml golden_prompts: # Simple intent - "Hello, how are you?" - # Complex intent with parameters - - "Book a flight from New York to Los Angeles departing March 15th" + # Medium complexity + - "Book a flight to Paris" - # Edge case - - "What if I need to cancel my booking?" + # Complex with multiple parameters + - "Book a flight from New York to Los Angeles departing March 15th, returning March 22nd, economy class, window seat" ``` +**3. Include Edge Cases** +```yaml +golden_prompts: + # Normal case + - "Book a flight to Paris" + + # Edge case: unusual request + - "What if I need to cancel my booking?" + + # Edge case: minimal input + - "Paris" + + # Edge case: ambiguous request + - "I need to travel somewhere warm" +``` + +#### Examples by Agent Type + +**1. Simple Chat Agent** +```yaml +golden_prompts: + - "What is the weather in New York?" + - "Tell me a joke" + - "How do I make a paper airplane?" + - "What's 2 + 2?" +``` + +**2. E-commerce Assistant** +```yaml +golden_prompts: + - "I'm looking for a red dress size medium" + - "Show me running shoes under $100" + - "What's the return policy?" + - "Add this to my cart" + - "Track my order #ABC123" +``` + +**3. Structured Input Agent (Reddit Search Query Generator)** + +For agents that accept structured input (like a Reddit community discovery assistant): + +```yaml +golden_prompts: + # B2C SaaS example + - | + Industry: Fitness tech + Product/Service: AI personal trainer app + Business Model: B2C + Target Market: fitness enthusiasts, people who want to lose weight + Description: An app that provides personalized workout plans using AI + + # B2B SaaS example + - | + Industry: Marketing tech + Product/Service: Email automation platform + Business Model: B2B SaaS + Target Market: small business owners, marketing teams + Description: Automated email campaigns for small businesses + + # Marketplace example + - | + Industry: E-commerce + Product/Service: Handmade crafts marketplace + Business Model: Marketplace + Target Market: crafters, DIY enthusiasts, gift buyers + Description: Platform connecting artisans with buyers + + # Edge case - minimal description + - | + Industry: Healthcare tech + Product/Service: Telemedicine platform + Business Model: B2C + Target Market: busy professionals + Description: Video consultations +``` + +**4. API/Function-Calling Agent** +```yaml +golden_prompts: + - "Get the weather for San Francisco" + - "Send an email to john@example.com with subject 'Meeting'" + - "Create a calendar event for tomorrow at 3pm" + - "What's my schedule for next week?" +``` + +**5. Code Generation Agent** +```yaml +golden_prompts: + - "Write a Python function to sort a list" + - "Create a React component for a login form" + - "How do I connect to a PostgreSQL database in Node.js?" + - "Fix this bug: [code snippet]" +``` + +#### Best Practices + +**1. Start Small, Then Expand** +```yaml +# Phase 1: Start with 2-3 core prompts +golden_prompts: + - "Primary use case 1" + - "Primary use case 2" + +# Phase 2: Add more as you validate +golden_prompts: + - "Primary use case 1" + - "Primary use case 2" + - "Secondary use case" + - "Edge case 1" + - "Edge case 2" +``` + +**2. Cover Different User Personas** +```yaml +golden_prompts: + # Professional user + - "I need to schedule a meeting with the team for Q4 planning" + + # Casual user + - "hey can u help me book something" + + # Technical user + - "Query the database for all users created after 2024-01-01" + + # Non-technical user + - "Show me my account" +``` + +**3. Include Real Production Examples** +```yaml +golden_prompts: + # From your production logs + - "Actual user query from logs" + - "Another real example" + - "Edge case that caused issues before" +``` + +**4. Test Different Input Formats** +```yaml +golden_prompts: + # Well-formatted + - "Book a flight from New York to Los Angeles on March 15th" + + # Informal + - "need a flight nyc to la march 15" + + # With extra context + - "Hi! I'm planning a trip and I need to book a flight from New York City to Los Angeles on March 15th, 2024. Can you help?" +``` + +**5. For Structured Input: Cover All Variations** +```yaml +golden_prompts: + # Complete input + - | + Industry: Tech + Product: SaaS platform + Model: B2B + Market: Enterprises + Description: Full description here + + # Minimal input (edge case) + - | + Industry: Tech + Product: Platform + + # Different business models + - | + Industry: Retail + Product: E-commerce site + Model: B2C + Market: Consumers +``` + +#### Common Patterns + +**Pattern 1: Question-Answer Agent** +```yaml +golden_prompts: + - "What is X?" + - "How do I Y?" + - "Why does Z happen?" + - "When should I do A?" +``` + +**Pattern 2: Task-Oriented Agent** +```yaml +golden_prompts: + - "Do X" (imperative) + - "I need to do X" (declarative) + - "Can you help me with X?" (question form) + - "X please" (polite request) +``` + +**Pattern 3: Multi-Turn Context Agent** +```yaml +golden_prompts: + # First turn + - "I'm looking for a hotel" + # Second turn (test separately) + - "In Paris" + # Third turn (test separately) + - "Under $200 per night" +``` + +**Pattern 4: Data Processing Agent** +```yaml +golden_prompts: + - "Analyze this data: [data]" + - "Summarize the following: [text]" + - "Extract key information from: [content]" +``` + +#### What NOT to Include + +โŒ **Don't include:** +- Prompts that are known to fail (those are edge cases to test, not golden prompts) +- System prompts or instructions (those stay in your code) +- Malformed inputs (FlakeStorm will generate those as mutations) +- Test-only prompts that users would never send + +โœ… **Do include:** +- Real user queries from production +- Expected use cases +- Prompts that should always work +- Representative examples of your user base + ### Mutation Types flakestorm generates adversarial variations of your golden prompts: @@ -862,6 +1119,143 @@ agent = AgentExecutor(...) --- +## Request Templates and Connection Setup + +### Understanding Request Templates + +Request templates allow you to map FlakeStorm's format to your agent's exact API format. + +#### Basic Template + +```yaml +agent: + endpoint: "http://localhost:8000/api/chat" + type: "http" + request_template: | + {"message": "{prompt}", "stream": false} + response_path: "$.reply" +``` + +**What happens:** +1. FlakeStorm takes golden prompt: `"Book a flight to Paris"` +2. Replaces `{prompt}` in template: `{"message": "Book a flight to Paris", "stream": false}` +3. Sends to your endpoint +4. Extracts response from `$.reply` path + +#### Structured Input Mapping + +For agents that accept structured input: + +```yaml +agent: + endpoint: "http://localhost:8000/generate-query" + type: "http" + method: "POST" + request_template: | + { + "industry": "{industry}", + "productName": "{productName}", + "businessModel": "{businessModel}", + "targetMarket": "{targetMarket}", + "description": "{description}" + } + response_path: "$.query" + parse_structured_input: true +``` + +**Golden Prompt:** +```yaml +golden_prompts: + - | + Industry: Fitness tech + Product/Service: AI personal trainer app + Business Model: B2C + Target Market: fitness enthusiasts + Description: An app that provides personalized workout plans +``` + +**What happens:** +1. FlakeStorm parses structured input into key-value pairs +2. Maps fields to template: `{"industry": "Fitness tech", "productName": "AI personal trainer app", ...}` +3. Sends to your endpoint +4. Extracts response from `$.query` + +#### Different HTTP Methods + +**GET Request:** +```yaml +agent: + endpoint: "http://api.example.com/search" + type: "http" + method: "GET" + request_template: "q={prompt}" + query_params: + api_key: "${API_KEY}" + format: "json" +``` + +**PUT Request:** +```yaml +agent: + endpoint: "http://api.example.com/update" + type: "http" + method: "PUT" + request_template: | + {"id": "123", "content": "{prompt}"} +``` + +### Connection Setup + +#### For Python Code (No Endpoint Needed) + +```python +# my_agent.py +async def flakestorm_agent(input: str) -> str: + # Your agent logic + return result +``` + +```yaml +agent: + endpoint: "my_agent:flakestorm_agent" + type: "python" +``` + +#### For TypeScript/JavaScript (Need HTTP Endpoint) + +Create a wrapper endpoint: + +```typescript +// test-endpoint.ts +import express from 'express'; +import { yourAgentFunction } from './your-code'; + +const app = express(); +app.use(express.json()); + +app.post('/flakestorm-test', async (req, res) => { + const result = await yourAgentFunction(req.body.input); + res.json({ output: result }); +}); + +app.listen(8000); +``` + +```yaml +agent: + endpoint: "http://localhost:8000/flakestorm-test" + type: "http" +``` + +#### Localhost vs Public Endpoint + +- **Same machine:** Use `localhost:8000` +- **Different machine/CI/CD:** Use public endpoint (ngrok, cloud deployment) + +See [Connection Guide](CONNECTION_GUIDE.md) for detailed setup instructions. + +--- + ## Advanced Usage ### Custom Mutation Templates @@ -921,6 +1315,306 @@ advanced: retries: 3 # Retry failed requests 3 times ``` +### Golden Prompt Guide + +A comprehensive guide to creating effective golden prompts for your agent. + +#### Step-by-Step: Creating Golden Prompts + +**Step 1: Identify Core Use Cases** +```yaml +# List your agent's primary functions +# Example: Flight booking agent +golden_prompts: + - "Book a flight" # Core function + - "Check flight status" # Core function + - "Cancel booking" # Core function +``` + +**Step 2: Add Variations for Each Use Case** +```yaml +golden_prompts: + # Booking variations + - "Book a flight from NYC to LA" + - "I need to fly to Paris" + - "Reserve a ticket to Tokyo" + - "Can you book me a flight?" + + # Status check variations + - "What's my flight status?" + - "Check my booking" + - "Is my flight on time?" +``` + +**Step 3: Include Edge Cases** +```yaml +golden_prompts: + # Normal cases (from Step 2) + - "Book a flight from NYC to LA" + + # Edge cases + - "Book a flight" # Minimal input + - "I need to travel somewhere" # Vague request + - "What if I need to change my flight?" # Conditional + - "Book a flight for next year" # Far future +``` + +**Step 4: Cover Different User Styles** +```yaml +golden_prompts: + # Formal + - "I would like to book a flight from New York to Los Angeles" + + # Casual + - "hey can u book me a flight nyc to la" + + # Technical/precise + - "Flight booking: JFK -> LAX, 2024-03-15, economy" + + # Verbose + - "Hi! I'm planning a trip and I need to book a flight from New York City to Los Angeles on March 15th, 2024. Can you help me with that?" +``` + +#### Golden Prompts for Structured Input Agents + +For agents that accept structured data (JSON, YAML, key-value pairs): + +**Example: Reddit Community Discovery Agent** +```yaml +golden_prompts: + # Complete structured input + - | + Industry: Fitness tech + Product/Service: AI personal trainer app + Business Model: B2C + Target Market: fitness enthusiasts, people who want to lose weight + Description: An app that provides personalized workout plans using AI + + # Different business model + - | + Industry: Marketing tech + Product/Service: Email automation platform + Business Model: B2B SaaS + Target Market: small business owners, marketing teams + Description: Automated email campaigns for small businesses + + # Minimal input (edge case) + - | + Industry: Healthcare tech + Product/Service: Telemedicine platform + Business Model: B2C + + # Different industry + - | + Industry: E-commerce + Product/Service: Handmade crafts marketplace + Business Model: Marketplace + Target Market: crafters, DIY enthusiasts + Description: Platform connecting artisans with buyers +``` + +**Example: API Request Builder Agent** +```yaml +golden_prompts: + - | + Method: GET + Endpoint: /users + Headers: {"Authorization": "Bearer token"} + + - | + Method: POST + Endpoint: /orders + Body: {"product_id": 123, "quantity": 2} + + - | + Method: PUT + Endpoint: /users/123 + Body: {"name": "John Doe"} +``` + +#### Domain-Specific Examples + +**E-commerce Agent:** +```yaml +golden_prompts: + # Product search + - "I'm looking for a red dress size medium" + - "Show me running shoes under $100" + - "Find blue jeans for men" + + # Cart operations + - "Add this to my cart" + - "What's in my cart?" + - "Remove item from cart" + + # Orders + - "Track my order #ABC123" + - "What's my order status?" + - "Cancel my order" + + # Support + - "What's the return policy?" + - "How do I exchange an item?" + - "Contact customer service" +``` + +**Code Generation Agent:** +```yaml +golden_prompts: + # Simple functions + - "Write a Python function to sort a list" + - "Create a function to calculate factorial" + + # Components + - "Create a React component for a login form" + - "Build a Vue component for a todo list" + + # Integration + - "How do I connect to PostgreSQL in Node.js?" + - "Show me how to use Redis with Python" + + # Debugging + - "Fix this bug: [code snippet]" + - "Why is this code not working?" +``` + +**Customer Support Agent:** +```yaml +golden_prompts: + # Account questions + - "What's my account balance?" + - "How do I change my password?" + - "Update my email address" + + # Product questions + - "How do I use feature X?" + - "What are the system requirements?" + - "Is there a mobile app?" + + # Billing + - "What's my subscription status?" + - "How do I cancel my subscription?" + - "Update my payment method" +``` + +#### Quality Checklist + +Before finalizing your golden prompts, verify: + +- [ ] **Coverage**: All major features/use cases included +- [ ] **Diversity**: Different complexity levels (simple, medium, complex) +- [ ] **Realism**: Based on actual user queries from production +- [ ] **Edge Cases**: Unusual but valid inputs included +- [ ] **User Styles**: Formal, casual, technical, verbose variations +- [ ] **Quantity**: 5-15 prompts recommended (start with 5, expand) +- [ ] **Clarity**: Each prompt represents a distinct use case +- [ ] **Relevance**: All prompts are things users would actually send + +#### Iterative Improvement + +**Phase 1: Initial Set (5 prompts)** +```yaml +golden_prompts: + - "Primary use case 1" + - "Primary use case 2" + - "Primary use case 3" + - "Secondary use case 1" + - "Edge case 1" +``` + +**Phase 2: Expand (10 prompts)** +```yaml +# Add variations and more edge cases +golden_prompts: + # ... previous 5 ... + - "Primary use case 1 variation" + - "Primary use case 2 variation" + - "Secondary use case 2" + - "Edge case 2" + - "Edge case 3" +``` + +**Phase 3: Refine (15+ prompts)** +```yaml +# Add based on test results and production data +golden_prompts: + # ... previous 10 ... + - "Real user query from logs" + - "Another production example" + - "Failure case that should work" +``` + +#### Common Mistakes to Avoid + +โŒ **Too Generic** +```yaml +# Bad: Too vague +golden_prompts: + - "Help me" + - "Do something" + - "Question" +``` + +โœ… **Specific and Actionable** +```yaml +# Good: Clear intent +golden_prompts: + - "Book a flight from NYC to LA" + - "What's my account balance?" + - "Cancel my subscription" +``` + +โŒ **Including System Prompts** +```yaml +# Bad: This is a system prompt, not a golden prompt +golden_prompts: + - "You are a helpful assistant that..." +``` + +โœ… **User Inputs Only** +```yaml +# Good: Actual user queries +golden_prompts: + - "Book a flight" + - "What's the weather?" +``` + +โŒ **Only Happy Path** +```yaml +# Bad: Only perfect inputs +golden_prompts: + - "Book a flight from New York to Los Angeles on March 15th, 2024, economy class, window seat, no meals" +``` + +โœ… **Include Variations** +```yaml +# Good: Various input styles +golden_prompts: + - "Book a flight from NYC to LA" + - "I need to fly to Los Angeles" + - "flight booking please" + - "Can you help me book a flight?" +``` + +#### Testing Your Golden Prompts + +Before running FlakeStorm, manually test your golden prompts: + +```bash +# Test each golden prompt manually +curl -X POST http://localhost:8000/invoke \ + -H "Content-Type: application/json" \ + -d '{"input": "Your golden prompt here"}' +``` + +Verify: +- โœ… Agent responds correctly +- โœ… Response time is reasonable +- โœ… No errors occur +- โœ… Response format matches expectations + +If a golden prompt fails manually, fix your agent first, then use it in FlakeStorm. + --- ## Troubleshooting diff --git a/flakestorm.yaml.example b/flakestorm.yaml.example index 66f4d19..1f6ba8a 100644 --- a/flakestorm.yaml.example +++ b/flakestorm.yaml.example @@ -6,22 +6,65 @@ version: "1.0" -# Agent Configuration -# Define how flakestorm connects to your agent +# ============================================================================= +# AGENT CONFIGURATION +# ============================================================================= +# Choose the configuration that matches your agent setup: + +# Example 1: Default HTTP format (simple) agent: - # HTTP endpoint that accepts POST requests with {"input": "..."} body endpoint: "http://localhost:8000/invoke" - - # Agent type: "http" | "python" | "langchain" type: "http" - - # Timeout in milliseconds for each agent call timeout: 30000 - - # Optional: Custom headers for HTTP requests + # Optional: Custom headers # headers: # Authorization: "Bearer ${AGENT_API_KEY}" - # Content-Type: "application/json" + +# Example 2: Custom request template (for custom API formats) +# agent: +# endpoint: "http://localhost:8000/api/chat" +# type: "http" +# method: "POST" +# timeout: 30000 +# request_template: | +# {"message": "{prompt}", "stream": false} +# response_path: "$.reply" +# headers: +# Authorization: "Bearer ${API_KEY}" + +# Example 3: Structured input mapping (for agents with structured input) +# agent: +# endpoint: "http://localhost:8000/generate-query" +# type: "http" +# method: "POST" +# timeout: 30000 +# request_template: | +# { +# "industry": "{industry}", +# "productName": "{productName}", +# "businessModel": "{businessModel}", +# "targetMarket": "{targetMarket}", +# "description": "{description}" +# } +# response_path: "$.query" +# parse_structured_input: true + +# Example 4: GET request with query parameters +# agent: +# endpoint: "http://api.example.com/search" +# type: "http" +# method: "GET" +# timeout: 30000 +# request_template: "q={prompt}" +# query_params: +# api_key: "${API_KEY}" +# format: "json" + +# Example 5: Python adapter (no HTTP endpoint needed) +# agent: +# endpoint: "my_agent:flakestorm_agent" +# type: "python" +# timeout: 30000 # Model Configuration # The local model used to generate adversarial mutations diff --git a/src/flakestorm/core/config.py b/src/flakestorm/core/config.py index 3696938..774a0e8 100644 --- a/src/flakestorm/core/config.py +++ b/src/flakestorm/core/config.py @@ -31,6 +31,25 @@ class AgentConfig(BaseModel): endpoint: str = Field(..., description="Agent endpoint URL or Python module path") type: AgentType = Field(default=AgentType.HTTP, description="Agent connection type") + method: str = Field( + default="POST", + description="HTTP method (GET, POST, PUT, PATCH, DELETE)", + ) + request_template: str | None = Field( + default=None, + description="Template for request body/query with variable substitution (use {prompt} or {field_name})", + ) + response_path: str | None = Field( + default=None, + description="JSONPath or dot notation to extract response from JSON (e.g., '$.data.result' or 'data.result')", + ) + query_params: dict[str, str] = Field( + default_factory=dict, description="Static query parameters for HTTP requests" + ) + parse_structured_input: bool = Field( + default=True, + description="Whether to parse structured golden prompts into key-value pairs", + ) timeout: int = Field( default=30000, ge=1000, le=300000, description="Timeout in milliseconds" ) @@ -45,12 +64,29 @@ class AgentConfig(BaseModel): # Expand environment variables return os.path.expandvars(v) + @field_validator("method") + @classmethod + def validate_method(cls, v: str) -> str: + """Validate HTTP method.""" + valid_methods = {"GET", "POST", "PUT", "PATCH", "DELETE"} + if v.upper() not in valid_methods: + raise ValueError( + f"Invalid HTTP method: {v}. Must be one of {valid_methods}" + ) + return v.upper() + @field_validator("headers") @classmethod def expand_header_env_vars(cls, v: dict[str, str]) -> dict[str, str]: """Expand environment variables in header values.""" return {k: os.path.expandvars(val) for k, val in v.items()} + @field_validator("query_params") + @classmethod + def expand_query_env_vars(cls, v: dict[str, str]) -> dict[str, str]: + """Expand environment variables in query parameter values.""" + return {k: os.path.expandvars(val) for k, val in v.items()} + class ModelConfig(BaseModel): """Configuration for the mutation generation model.""" diff --git a/src/flakestorm/core/protocol.py b/src/flakestorm/core/protocol.py index 8534213..c55d86f 100644 --- a/src/flakestorm/core/protocol.py +++ b/src/flakestorm/core/protocol.py @@ -9,6 +9,8 @@ from __future__ import annotations import asyncio import importlib +import json +import re import time from abc import ABC, abstractmethod from collections.abc import Callable @@ -58,6 +60,140 @@ class AgentProtocol(Protocol): ... +def parse_structured_input(input_text: str) -> dict[str, str]: + """ + Parse structured input text into key-value dictionary. + + Supports formats: + - "Key: Value" + - "Key=Value" + - "Key - Value" + - Multi-line with newlines + + Args: + input_text: Structured text input + + Returns: + Dictionary of parsed key-value pairs (normalized keys) + """ + result: dict[str, str] = {} + lines = input_text.strip().split("\n") + + for line in lines: + line = line.strip() + if not line: + continue + + # Try different separators: ":", "=", " - " + if ":" in line: + parts = line.split(":", 1) + elif "=" in line: + parts = line.split("=", 1) + elif " - " in line: + parts = line.split(" - ", 1) + else: + continue + + if len(parts) == 2: + key = parts[0].strip() + value = parts[1].strip() + + # Normalize key: lowercase, remove spaces/special chars + normalized_key = re.sub(r"[^a-z0-9]", "", key.lower()) + if normalized_key: + result[normalized_key] = value + + return result + + +def render_template( + template: str, prompt: str, structured_data: dict[str, str] | None = None +) -> dict | str: + """ + Render request template with variable substitution. + + Supports: + - {prompt} - Full golden prompt text + - {field_name} - Parsed structured input values + + Args: + template: Template string with {variable} placeholders + prompt: Full golden prompt text + structured_data: Parsed structured input data + + Returns: + Rendered template (dict if JSON, str otherwise) + """ + # Replace {prompt} first + rendered = template.replace("{prompt}", prompt) + + # Replace structured data fields if available + if structured_data: + for key, value in structured_data.items(): + placeholder = f"{{{key}}}" + rendered = rendered.replace(placeholder, value) + + # Try to parse as JSON, return dict if successful + try: + return json.loads(rendered) + except json.JSONDecodeError: + # Not JSON, return as string + return rendered + + +def extract_response(data: dict | list, path: str | None) -> str: + """ + Extract response from JSON using JSONPath or dot notation. + + Supports: + - JSONPath: "$.data.result" + - Dot notation: "data.result" + - Simple key: "result" + + Args: + data: JSON data (dict or list) + path: JSONPath or dot notation path + + Returns: + Extracted response as string + """ + if path is None: + # Fallback to default fields + if isinstance(data, dict): + return data.get("output") or data.get("response") or str(data) + return str(data) + + # Remove leading $ if present (JSONPath style) + path = path.lstrip("$.") + + # Split by dots for nested access + keys = path.split(".") + current: Any = data + + try: + for key in keys: + if isinstance(current, dict): + current = current.get(key) + elif isinstance(current, list): + # Try to use key as index + try: + current = current[int(key)] + except (ValueError, IndexError): + return str(data) + else: + return str(data) + + if current is None: + return str(data) + + return str(current) if current is not None else str(data) + except (KeyError, TypeError, AttributeError): + # Fallback to default extraction + if isinstance(data, dict): + return data.get("output") or data.get("response") or str(data) + return str(data) + + class BaseAgentAdapter(ABC): """Base class for agent adapters.""" @@ -87,16 +223,17 @@ class HTTPAgentAdapter(BaseAgentAdapter): """ Adapter for agents exposed via HTTP endpoints. - Expects the endpoint to accept POST requests with JSON body: - {"input": "user prompt"} - - And return JSON response: - {"output": "agent response"} + Supports flexible request templates, all HTTP methods, and custom response extraction. """ def __init__( self, endpoint: str, + method: str = "POST", + request_template: str | None = None, + response_path: str | None = None, + query_params: dict[str, str] | None = None, + parse_structured_input: bool = True, timeout: int = 30000, headers: dict[str, str] | None = None, retries: int = 2, @@ -106,11 +243,21 @@ class HTTPAgentAdapter(BaseAgentAdapter): Args: endpoint: The HTTP endpoint URL + method: HTTP method (GET, POST, PUT, PATCH, DELETE) + request_template: Template for request body/query with variable substitution + response_path: JSONPath or dot notation to extract response + query_params: Static query parameters + parse_structured_input: Whether to parse structured golden prompts timeout: Request timeout in milliseconds headers: Optional custom headers retries: Number of retry attempts """ self.endpoint = endpoint + self.method = method.upper() + self.request_template = request_template + self.response_path = response_path + self.query_params = query_params or {} + self.parse_structured_input = parse_structured_input self.timeout = timeout / 1000 # Convert to seconds self.headers = headers or {} self.retries = retries @@ -124,18 +271,65 @@ class HTTPAgentAdapter(BaseAgentAdapter): for attempt in range(self.retries + 1): try: - response = await client.post( - self.endpoint, - json={"input": input}, - headers=self.headers, - ) + # 1. Parse structured input if enabled + structured_data = None + if self.parse_structured_input: + structured_data = parse_structured_input(input) + + # 2. Render request template + if self.request_template: + rendered = render_template( + self.request_template, input, structured_data + ) + request_data = rendered + else: + # Default format + request_data = {"input": input} + + # 3. Build request based on method + if self.method in ["GET", "DELETE"]: + # Query params only (merge template data as query params) + if isinstance(request_data, dict): + params = {**self.query_params, **request_data} + else: + # If template rendered to string, use as query string + params = {**self.query_params} + if request_data: + params["q"] = str(request_data) + + response = await client.request( + self.method, + self.endpoint, + params=params, + headers=self.headers, + ) + else: + # POST, PUT, PATCH: Body + optional query params + if isinstance(request_data, dict): + response = await client.request( + self.method, + self.endpoint, + json=request_data, + params=self.query_params, + headers=self.headers, + ) + else: + # String body (e.g., for form data) + response = await client.request( + self.method, + self.endpoint, + content=str(request_data), + params=self.query_params, + headers=self.headers, + ) + response.raise_for_status() latency_ms = (time.perf_counter() - start_time) * 1000 data = response.json() - # Handle different response formats - output = data.get("output") or data.get("response") or str(data) + # 4. Extract response using response_path + output = extract_response(data, self.response_path) return AgentResponse( output=output, @@ -308,6 +502,11 @@ def create_agent_adapter(config: AgentConfig) -> BaseAgentAdapter: if config.type == AgentType.HTTP: return HTTPAgentAdapter( endpoint=config.endpoint, + method=config.method, + request_template=config.request_template, + response_path=config.response_path, + query_params=config.query_params, + parse_structured_input=config.parse_structured_input, timeout=config.timeout, headers=config.headers, ) diff --git a/tests/test_adapters.py b/tests/test_adapters.py index e699542..f97948a 100644 --- a/tests/test_adapters.py +++ b/tests/test_adapters.py @@ -178,3 +178,288 @@ class TestAgentResponse: error="Failed", ) assert error_response.success is False + + +class TestStructuredInputParser: + """Tests for structured input parsing.""" + + def test_parse_colon_format(self): + """Test parsing key:value format.""" + from flakestorm.core.protocol import parse_structured_input + + input_text = "Industry: Fitness tech\nProduct: AI trainer" + result = parse_structured_input(input_text) + + assert result["industry"] == "Fitness tech" + assert result["product"] == "AI trainer" + + def test_parse_equals_format(self): + """Test parsing key=value format.""" + from flakestorm.core.protocol import parse_structured_input + + input_text = "Industry=Fitness tech\nProduct=AI trainer" + result = parse_structured_input(input_text) + + assert result["industry"] == "Fitness tech" + assert result["product"] == "AI trainer" + + def test_parse_dash_format(self): + """Test parsing key - value format.""" + from flakestorm.core.protocol import parse_structured_input + + input_text = "Industry - Fitness tech\nProduct - AI trainer" + result = parse_structured_input(input_text) + + assert result["industry"] == "Fitness tech" + assert result["product"] == "AI trainer" + + def test_parse_multiline(self): + """Test parsing multi-line structured input.""" + from flakestorm.core.protocol import parse_structured_input + + input_text = """ + Industry: Fitness tech + Product/Service: AI personal trainer app + Business Model: B2C + Target Market: fitness enthusiasts + Description: An app that provides personalized workout plans + """ + result = parse_structured_input(input_text) + + assert result["industry"] == "Fitness tech" + assert result["productservice"] == "AI personal trainer app" + assert result["businessmodel"] == "B2C" + assert result["targetmarket"] == "fitness enthusiasts" + assert ( + result["description"] == "An app that provides personalized workout plans" + ) + + def test_parse_empty_input(self): + """Test parsing empty input.""" + from flakestorm.core.protocol import parse_structured_input + + result = parse_structured_input("") + assert result == {} + + def test_parse_normalizes_keys(self): + """Test that keys are normalized (lowercase, no spaces).""" + from flakestorm.core.protocol import parse_structured_input + + input_text = "Product Name: AI Trainer\nBusiness-Model: B2C" + result = parse_structured_input(input_text) + + # Keys should be normalized + assert "productname" in result + assert "businessmodel" in result + + +class TestTemplateEngine: + """Tests for template rendering.""" + + def test_render_simple_template(self): + """Test rendering template with {prompt}.""" + from flakestorm.core.protocol import render_template + + template = '{"message": "{prompt}"}' + prompt = "Book a flight" + result = render_template(template, prompt) + + assert result == {"message": "Book a flight"} + + def test_render_with_structured_data(self): + """Test rendering template with structured data fields.""" + from flakestorm.core.protocol import render_template + + template = '{"industry": "{industry}", "product": "{productname}"}' + prompt = "test" + structured_data = {"industry": "Fitness tech", "productname": "AI trainer"} + + result = render_template(template, prompt, structured_data) + + assert result == {"industry": "Fitness tech", "product": "AI trainer"} + + def test_render_json_template(self): + """Test rendering JSON template.""" + from flakestorm.core.protocol import render_template + + template = '{"messages": [{"role": "user", "content": "{prompt}"}]}' + prompt = "Hello" + result = render_template(template, prompt) + + assert isinstance(result, dict) + assert result["messages"][0]["content"] == "Hello" + + def test_render_string_template(self): + """Test rendering non-JSON template.""" + from flakestorm.core.protocol import render_template + + template = "q={prompt}&format=json" + prompt = "search query" + result = render_template(template, prompt) + + assert result == "q=search query&format=json" + + +class TestResponseExtractor: + """Tests for response extraction.""" + + def test_extract_simple_key(self): + """Test extracting simple key from response.""" + from flakestorm.core.protocol import extract_response + + data = {"output": "Hello world"} + result = extract_response(data, "output") + + assert result == "Hello world" + + def test_extract_dot_notation(self): + """Test extracting nested field using dot notation.""" + from flakestorm.core.protocol import extract_response + + data = {"data": {"result": "Success"}} + result = extract_response(data, "data.result") + + assert result == "Success" + + def test_extract_jsonpath(self): + """Test extracting using JSONPath-style notation.""" + from flakestorm.core.protocol import extract_response + + data = {"data": {"result": "Success"}} + result = extract_response(data, "$.data.result") + + assert result == "Success" + + def test_extract_default_fallback(self): + """Test default extraction when path is None.""" + from flakestorm.core.protocol import extract_response + + data = {"output": "Hello"} + result = extract_response(data, None) + + assert result == "Hello" + + def test_extract_fallback_to_response(self): + """Test fallback to 'response' key.""" + from flakestorm.core.protocol import extract_response + + data = {"response": "Hello"} + result = extract_response(data, None) + + assert result == "Hello" + + def test_extract_missing_path(self): + """Test extraction with missing path falls back to default.""" + from flakestorm.core.protocol import extract_response + + data = {"output": "Hello"} + result = extract_response(data, "nonexistent.path") + + # Should fall back to default extraction + assert result == "Hello" + + +class TestHTTPAgentAdapterNewFeatures: + """Tests for new HTTP adapter features.""" + + def test_adapter_with_method(self): + """Test adapter creation with custom HTTP method.""" + from flakestorm.core.protocol import HTTPAgentAdapter + + adapter = HTTPAgentAdapter( + endpoint="http://localhost:8000/api", + method="GET", + ) + assert adapter.method == "GET" + + def test_adapter_with_request_template(self): + """Test adapter creation with request template.""" + from flakestorm.core.protocol import HTTPAgentAdapter + + template = '{"message": "{prompt}"}' + adapter = HTTPAgentAdapter( + endpoint="http://localhost:8000/api", + request_template=template, + ) + assert adapter.request_template == template + + def test_adapter_with_response_path(self): + """Test adapter creation with response path.""" + from flakestorm.core.protocol import HTTPAgentAdapter + + adapter = HTTPAgentAdapter( + endpoint="http://localhost:8000/api", + response_path="$.data.result", + ) + assert adapter.response_path == "$.data.result" + + def test_adapter_with_query_params(self): + """Test adapter creation with query parameters.""" + from flakestorm.core.protocol import HTTPAgentAdapter + + query_params = {"api_key": "test", "format": "json"} + adapter = HTTPAgentAdapter( + endpoint="http://localhost:8000/api", + query_params=query_params, + ) + assert adapter.query_params == query_params + + def test_adapter_parse_structured_input_flag(self): + """Test adapter with parse_structured_input flag.""" + from flakestorm.core.protocol import HTTPAgentAdapter + + adapter = HTTPAgentAdapter( + endpoint="http://localhost:8000/api", + parse_structured_input=False, + ) + assert adapter.parse_structured_input is False + + def test_adapter_all_new_features(self): + """Test adapter with all new features combined.""" + from flakestorm.core.protocol import HTTPAgentAdapter + + adapter = HTTPAgentAdapter( + endpoint="http://localhost:8000/api", + method="PUT", + request_template='{"content": "{prompt}"}', + response_path="$.result", + query_params={"version": "v1"}, + parse_structured_input=True, + timeout=60000, + headers={"Authorization": "Bearer token"}, + ) + + assert adapter.method == "PUT" + assert adapter.request_template == '{"content": "{prompt}"}' + assert adapter.response_path == "$.result" + assert adapter.query_params == {"version": "v1"} + assert adapter.parse_structured_input is True + assert adapter.timeout == 60.0 + assert adapter.headers == {"Authorization": "Bearer token"} + + +class TestAgentAdapterFactoryNewFeatures: + """Tests for factory with new config fields.""" + + def test_factory_passes_all_fields(self): + """Test factory passes all new config fields to HTTP adapter.""" + from flakestorm.core.config import AgentConfig, AgentType + from flakestorm.core.protocol import HTTPAgentAdapter, create_agent_adapter + + config = AgentConfig( + endpoint="http://localhost:8000/api", + type=AgentType.HTTP, + method="POST", + request_template='{"message": "{prompt}"}', + response_path="$.reply", + query_params={"key": "value"}, + parse_structured_input=True, + ) + + adapter = create_agent_adapter(config) + assert isinstance(adapter, HTTPAgentAdapter) + assert adapter.method == "POST" + assert adapter.request_template == '{"message": "{prompt}"}' + assert adapter.response_path == "$.reply" + assert adapter.query_params == {"key": "value"} + assert adapter.parse_structured_input is True