plano/tests/rest/api_llm_gateway.rest

@llm_endpoint = http://localhost:12000
@openai_endpoint = https://api.openai.com
@access_key = {{$dotenv OPENAI_API_KEY}}

POST {{llm_endpoint}}/v1/chat/completions HTTP/1.1
content-type: application/json
authorization: Bearer
accept: */*
accept-encoding: deflate
user-agent: Python/3.11 aiohttp/3.11.11
content-length: 876
x-forwarded-proto: https
x-request-id: 99d7817d-a646-9497-a38d-710b1ce1325f
traceparent: 00-e4c9fc8cf9fc7714c6a15ef34852fb30-573a351a98e0cd01-01
tracestate:
x-arch-llm-provider-hint: gpt-4o-mini


{
  "model": "gpt-4o-mini",
  "messages": [
    {
      "role": "user",
      "content": "### Task:\nGenerate 1-3 broad tags categorizing the main themes of the chat history, along with 1-3 more specific subtopic tags.\n\n### Guidelines:\n- Start with high-level domains (e.g. Science, Technology, Philosophy, Arts, Politics, Business, Health, Sports, Entertainment, Education)\n- Consider including relevant subfields/subdomains if they are strongly represented throughout the conversation\n- If content is too short (less than 3 messages) or too diverse, use only [\"General\"]\n- Use the chat's primary language; default to English if multilingual\n- Prioritize accuracy over specificity\n\n### Output:\nJSON format: { \"tags\": [\"tag1\", \"tag2\", \"tag3\"] }\n\n### Chat History:\n<chat_history>\nUSER: hello\nASSISTANT: Hello! How can I assist you today?\n</chat_history>"
    }
  ],
  "stream": false
}

### test
POST {{llm_endpoint}}/v1/chat/completions HTTP/1.1
Content-Type: application/json
Authorization: Bearer {{access_key}}

{
  "messages": [
    {
      "role": "user",
      "content": "hello"
    }
  ],
  "model": "gpt-4o-mini",
  "stream": false
}

### openai request (streaming)
POST {{openai_endpoint}}/v1/chat/completions HTTP/1.1
Content-Type: application/json
Authorization: Bearer {{access_key}}

{
  "messages": [
    {
      "role": "user",
      "content": "hello"
    }
  ],
  "model": "gpt-4o-mini",
  "stream": true
}


### llm gateway request
POST {{llm_endpoint}}/v1/chat/completions HTTP/1.1
Content-Type: application/json

{
  "messages": [
    {
      "role": "user",
      "content": "hello"
    }
  ]
}

### llm gateway request (streaming)
POST {{llm_endpoint}}/v1/chat/completions HTTP/1.1
Content-Type: application/json

{
  "messages": [
    {
      "role": "user",
      "content": "hello"
    }
  ],
  "stream": true
}

### llm gateway request (provider hint)
POST {{llm_endpoint}}/v1/chat/completions HTTP/1.1
Content-Type: application/json
x-arch-llm-provider-hint: gpt-3.5-turbo-0125

{
  "messages": [
    {
      "role": "user",
      "content": "hello"
    }
  ]
}

### llm gateway request with function calling (default target)
POST {{llm_endpoint}}/v1/chat/completions HTTP/1.1
Content-Type: application/json

{
  "stream": true,
  "model": "gpt-4o",
  "messages": [
    {
      "role": "user",
      "content": "how is the weather in seattle"
    }
  ],
  "tools": [
    {
      "type": "function",
      "function": {
        "name": "get_current_weather",
        "description": "Get current weather at a location.",
        "parameters": {
          "type": "object",
          "properties": {
            "location": {
              "type": "string",
              "description": "The location to get the weather for",
              "format": "City, State"
            },
            "unit": {
              "type": "string",
              "description": "The unit to return the weather in.",
              "enum": ["celsius", "fahrenheit"],
              "default": "celsius"
            },
            "days": {
              "type": "string",
              "description": "The number of days for the request."
            }
          },
          "required": ["location", "days"]
        }
      }
    }
  ]
}