diff --git a/README.md b/README.md index a59f88c2..a06316bd 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,4 @@ - -

- Arch Gateway Logo -

+![alt text](image.png) [![pre-commit](https://github.com/katanemo/arch/actions/workflows/pre-commit.yml/badge.svg)](https://github.com/katanemo/arch/actions/workflows/pre-commit.yml) [![rust tests (prompt and llm gateway)](https://github.com/katanemo/arch/actions/workflows/rust_tests.yml/badge.svg)](https://github.com/katanemo/arch/actions/workflows/rust_tests.yml) diff --git a/api_llm_gateway.rest b/api_llm_gateway.rest new file mode 100644 index 00000000..b40c229b --- /dev/null +++ b/api_llm_gateway.rest @@ -0,0 +1,76 @@ +@llm_endpoint = http://localhost:12000 +@openai_endpoint = https://api.openai.com +@access_key = {{$dotenv OPENAI_API_KEY}} + +### openai request +POST {{openai_endpoint}}/v1/chat/completions HTTP/1.1 +Content-Type: application/json +Authorization: Bearer {{access_key}} + +{ + "messages": [ + { + "role": "user", + "content": "hello" + } + ], + "model": "gpt-4o-mini" +} + +### openai request (streaming) +POST {{openai_endpoint}}/v1/chat/completions HTTP/1.1 +Content-Type: application/json +Authorization: Bearer {{access_key}} + +{ + "messages": [ + { + "role": "user", + "content": "hello" + } + ], + "model": "gpt-4o-mini", + "stream": true +} + + +### llm gateway request +POST {{llm_endpoint}}/v1/chat/completions HTTP/1.1 +Content-Type: application/json + +{ + "messages": [ + { + "role": "user", + "content": "hello" + } + ] +} + +### llm gateway request (streaming) +POST {{llm_endpoint}}/v1/chat/completions HTTP/1.1 +Content-Type: application/json + +{ + "messages": [ + { + "role": "user", + "content": "hello" + } + ], + "stream": true +} + +### llm gateway request (provider hint) +POST {{llm_endpoint}}/v1/chat/completions HTTP/1.1 +Content-Type: application/json +x-arch-llm-provider-hint: gpt-3.5-turbo-0125 + +{ + "messages": [ + { + "role": "user", + "content": "hello" + } + ] +} diff --git a/api_model_server.rest b/api_model_server.rest new file mode 100644 index 00000000..9102786a --- /dev/null +++ b/api_model_server.rest @@ -0,0 +1,44 @@ +@model_server_endpoint = http://localhost:51000 +@archfc_endpoint = https://api.fc.archgw.com + +### talk to model_server for completion +POST {{model_server_endpoint}}/v1/chat/completions HTTP/1.1 +Content-Type: application/json + +{ + "messages": [ + { + "role": "user", + "content": "how is the weather in seattle for next 10 days" + } + ], + "tools": [ + { + "id": "weather-112", + "tool_type": "function", + "function": { + "name": "weather_forecast", + "arguments": {"city": "str", "days": "int"} + } + } + ] +} + + +### talk to arch_fc directly for completion +POST {{archfc_endpoint}}/v1/chat/completions HTTP/1.1 +Content-Type: application/json + +{ + "model": "Arch-Function", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant.\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n\n{\"id\": \"weather-112\", \"tool_type\": \"function\", \"function\": {\"name\": \"weather_forecast\", \"arguments\": {\"city\": \"str\", \"days\": \"int\"}}}\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n" + }, + { "role": "user", "content": "how is the weather in seattle?" }, + { "role": "assistant", "content": "Of course! " } + ], + "continue_final_message": true, + "add_generation_prompt": false +} diff --git a/api_prompt_gateway.rest b/api_prompt_gateway.rest new file mode 100644 index 00000000..b79b4230 --- /dev/null +++ b/api_prompt_gateway.rest @@ -0,0 +1,87 @@ +@prompt_endpoint = http://localhost:10000 + +### prompt gateway request +POST {{prompt_endpoint}}/v1/chat/completions HTTP/1.1 +Content-Type: application/json + +{ + "messages": [ + { + "role": "user", + "content": "how is the weather in seattle for next 10 days" + } + ] +} + +### prompt gateway request (streaming) +POST {{prompt_endpoint}}/v1/chat/completions HTTP/1.1 +Content-Type: application/json + +{ + "messages": [ + { + "role": "user", + "content": "how is the weather in seattle for next 10 days" + } + ], + "stream": true +} + + +### prompt gateway request param gathering +POST {{prompt_endpoint}}/v1/chat/completions HTTP/1.1 +Content-Type: application/json + +{ + "messages": [ + { + "role": "user", + "content": "how is the weather in seattle" + } + ] +} + +### prompt gateway request param gathering and function calling +POST {{prompt_endpoint}}/v1/chat/completions HTTP/1.1 +Content-Type: application/json + +{ + "messages": [ + { + "role": "user", + "content": "how is the weather in seattle" + }, + { + "role": "assistant", + "content": "It seems I'm missing some information. Could you provide the following details days ?", + "model": "Arch-Function-1.5b" + }, + { + "role": "user", + "content": "for next 10 days" + } + ] +} + +### prompt gateway request param gathering and function calling (streaming) +POST {{prompt_endpoint}}/v1/chat/completions HTTP/1.1 +Content-Type: application/json + +{ + "messages": [ + { + "role": "user", + "content": "how is the weather in seattle" + }, + { + "role": "assistant", + "content": "It seems I'm missing some information. Could you provide the following details days ?", + "model": "Arch-Function-1.5b" + }, + { + "role": "user", + "content": "for next 10 days" + } + ], + "stream": true +} diff --git a/demos/hr_agent/arch_config.yaml b/demos/hr_agent/arch_config.yaml index c2c5fdca..ea0cfa8f 100644 --- a/demos/hr_agent/arch_config.yaml +++ b/demos/hr_agent/arch_config.yaml @@ -18,7 +18,7 @@ endpoints: # value could be ip address or a hostname with port # this could also be a list of endpoints for load balancing # for example endpoint: [ ip1:port, ip2:port ] - endpoint: host.docker.internal:18083 + endpoint: host.docker.internal:18080 # max time to wait for a connection to be established connect_timeout: 0.005s diff --git a/demos/network_agent/arch_config.yaml b/demos/network_agent/arch_config.yaml index e2945e9a..7b0d1903 100644 --- a/demos/network_agent/arch_config.yaml +++ b/demos/network_agent/arch_config.yaml @@ -52,6 +52,6 @@ endpoints: # value could be ip address or a hostname with port # this could also be a list of endpoints for load balancing # for example endpoint: [ ip1:port, ip2:port ] - endpoint: host.docker.internal:18083 + endpoint: host.docker.internal:18080 # max time to wait for a connection to be established connect_timeout: 0.005s diff --git a/image.png b/image.png new file mode 100644 index 00000000..d97bfe11 Binary files /dev/null and b/image.png differ