@llm_endpoint = http://localhost:12000 @openai_endpoint = https://api.openai.com @access_key = {{$dotenv OPENAI_API_KEY}} POST {{llm_endpoint}}/v1/chat/completions HTTP/1.1 content-type: application/json authorization: Bearer accept: */* accept-encoding: deflate user-agent: Python/3.11 aiohttp/3.11.11 content-length: 876 x-forwarded-proto: https x-request-id: 99d7817d-a646-9497-a38d-710b1ce1325f traceparent: 00-e4c9fc8cf9fc7714c6a15ef34852fb30-573a351a98e0cd01-01 tracestate: x-arch-llm-provider-hint: gpt-4o-mini { "model": "gpt-4o-mini", "messages": [ { "role": "user", "content": "### Task:\nGenerate 1-3 broad tags categorizing the main themes of the chat history, along with 1-3 more specific subtopic tags.\n\n### Guidelines:\n- Start with high-level domains (e.g. Science, Technology, Philosophy, Arts, Politics, Business, Health, Sports, Entertainment, Education)\n- Consider including relevant subfields/subdomains if they are strongly represented throughout the conversation\n- If content is too short (less than 3 messages) or too diverse, use only [\"General\"]\n- Use the chat's primary language; default to English if multilingual\n- Prioritize accuracy over specificity\n\n### Output:\nJSON format: { \"tags\": [\"tag1\", \"tag2\", \"tag3\"] }\n\n### Chat History:\n\nUSER: hello\nASSISTANT: Hello! How can I assist you today?\n" } ], "stream": false } ### test POST {{llm_endpoint}}/v1/chat/completions HTTP/1.1 Content-Type: application/json Authorization: Bearer {{access_key}} { "messages": [ { "role": "user", "content": "hello" } ], "model": "gpt-4o-mini", "stream": false } ### openai request (streaming) POST {{openai_endpoint}}/v1/chat/completions HTTP/1.1 Content-Type: application/json Authorization: Bearer {{access_key}} { "messages": [ { "role": "user", "content": "hello" } ], "model": "gpt-4o-mini", "stream": true } ### llm gateway request POST {{llm_endpoint}}/v1/chat/completions HTTP/1.1 Content-Type: application/json { "messages": [ { "role": "user", "content": "hello" } ] } ### llm gateway request (streaming) POST {{llm_endpoint}}/v1/chat/completions HTTP/1.1 Content-Type: application/json { "messages": [ { "role": "user", "content": "hello" } ], "stream": true } ### llm gateway request (provider hint) POST {{llm_endpoint}}/v1/chat/completions HTTP/1.1 Content-Type: application/json x-arch-llm-provider-hint: gpt-3.5-turbo-0125 { "messages": [ { "role": "user", "content": "hello" } ] } ### llm gateway request with function calling (default target) POST {{llm_endpoint}}/v1/chat/completions HTTP/1.1 Content-Type: application/json { "stream": true, "model": "gpt-4o", "messages": [ { "role": "user", "content": "how is the weather in seattle" } ], "tools": [ { "type": "function", "function": { "name": "get_current_weather", "description": "Get current weather at a location.", "parameters": { "type": "object", "properties": { "location": { "type": "string", "description": "The location to get the weather for", "format": "City, State" }, "unit": { "type": "string", "description": "The unit to return the weather in.", "enum": ["celsius", "fahrenheit"], "default": "celsius" }, "days": { "type": "string", "description": "The number of days for the request." } }, "required": ["location", "days"] } } } ] }