Merge branch 'main' of https://github.com/katanemo/arch into cotran/prefill

2026-06-17 15:25:17 +02:00 · 2024-11-04 15:53:25 -08:00 · 2024-11-04 15:53:25 -08:00 · 0d9cbdebda
commit 0d9cbdebda
parent 0910fcdcfa 9a5c5cc3a3
7 changed files with 210 additions and 6 deletions
--- a/README.md
+++ b/README.md
@ -1,7 +1,4 @@
-
-<p>
-  <img src="docs/source/_static/img/arch-logo.png" alt="Arch Gateway Logo" title="Arch Gateway Logo" height="350">
-</p>
+![alt text](image.png)

 [![pre-commit](https://github.com/katanemo/arch/actions/workflows/pre-commit.yml/badge.svg)](https://github.com/katanemo/arch/actions/workflows/pre-commit.yml)
 [![rust tests (prompt and llm gateway)](https://github.com/katanemo/arch/actions/workflows/rust_tests.yml/badge.svg)](https://github.com/katanemo/arch/actions/workflows/rust_tests.yml)
--- a/api_llm_gateway.rest
+++ b/api_llm_gateway.rest
@ -0,0 +1,76 @@
+@llm_endpoint = http://localhost:12000
+@openai_endpoint = https://api.openai.com
+@access_key = {{$dotenv OPENAI_API_KEY}}
+
+### openai request
+POST {{openai_endpoint}}/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+Authorization: Bearer {{access_key}}
+
+{
+  "messages": [
+    {
+      "role": "user",
+      "content": "hello"
+    }
+  ],
+  "model": "gpt-4o-mini"
+}
+
+### openai request (streaming)
+POST {{openai_endpoint}}/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+Authorization: Bearer {{access_key}}
+
+{
+  "messages": [
+    {
+      "role": "user",
+      "content": "hello"
+    }
+  ],
+  "model": "gpt-4o-mini",
+  "stream": true
+}
+
+
+### llm gateway request
+POST {{llm_endpoint}}/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+
+{
+  "messages": [
+    {
+      "role": "user",
+      "content": "hello"
+    }
+  ]
+}
+
+### llm gateway request (streaming)
+POST {{llm_endpoint}}/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+
+{
+  "messages": [
+    {
+      "role": "user",
+      "content": "hello"
+    }
+  ],
+  "stream": true
+}
+
+### llm gateway request (provider hint)
+POST {{llm_endpoint}}/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+x-arch-llm-provider-hint: gpt-3.5-turbo-0125
+
+{
+  "messages": [
+    {
+      "role": "user",
+      "content": "hello"
+    }
+  ]
+}
--- a/api_model_server.rest
+++ b/api_model_server.rest
@ -0,0 +1,44 @@
+@model_server_endpoint = http://localhost:51000
+@archfc_endpoint = https://api.fc.archgw.com
+
+### talk to model_server for completion
+POST {{model_server_endpoint}}/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+
+{
+  "messages": [
+    {
+      "role": "user",
+      "content": "how is the weather in seattle for next 10 days"
+    }
+  ],
+  "tools": [
+    {
+        "id": "weather-112",
+        "tool_type": "function",
+        "function": {
+          "name": "weather_forecast",
+          "arguments": {"city": "str", "days": "int"}
+        }
+    }
+  ]
+}
+
+
+### talk to arch_fc directly for completion
+POST {{archfc_endpoint}}/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+
+{
+  "model": "Arch-Function",
+  "messages": [
+    {
+      "role": "system",
+      "content": "You are a helpful assistant.\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{\"id\": \"weather-112\", \"tool_type\": \"function\", \"function\": {\"name\": \"weather_forecast\", \"arguments\": {\"city\": \"str\", \"days\": \"int\"}}}\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call>"
+    },
+    { "role": "user", "content": "how is the weather in seattle?" },
+    { "role": "assistant", "content": "Of course! " }
+  ],
+  "continue_final_message": true,
+  "add_generation_prompt": false
+}
--- a/api_prompt_gateway.rest
+++ b/api_prompt_gateway.rest
@ -0,0 +1,87 @@
+@prompt_endpoint = http://localhost:10000
+
+### prompt gateway request
+POST {{prompt_endpoint}}/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+
+{
+  "messages": [
+    {
+      "role": "user",
+      "content": "how is the weather in seattle for next 10 days"
+    }
+  ]
+}
+
+### prompt gateway request (streaming)
+POST {{prompt_endpoint}}/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+
+{
+  "messages": [
+    {
+      "role": "user",
+      "content": "how is the weather in seattle for next 10 days"
+    }
+  ],
+  "stream": true
+}
+
+
+### prompt gateway request param gathering
+POST {{prompt_endpoint}}/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+
+{
+  "messages": [
+    {
+      "role": "user",
+      "content": "how is the weather in seattle"
+    }
+  ]
+}
+
+### prompt gateway request param gathering and function calling
+POST {{prompt_endpoint}}/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+
+{
+  "messages": [
+    {
+      "role": "user",
+      "content": "how is the weather in seattle"
+    },
+    {
+      "role": "assistant",
+      "content": "It seems I'm missing some information. Could you provide the following details days ?",
+      "model": "Arch-Function-1.5b"
+    },
+    {
+      "role": "user",
+      "content": "for next 10 days"
+    }
+  ]
+}
+
+### prompt gateway request param gathering and function calling (streaming)
+POST {{prompt_endpoint}}/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+
+{
+  "messages": [
+    {
+      "role": "user",
+      "content": "how is the weather in seattle"
+    },
+    {
+      "role": "assistant",
+      "content": "It seems I'm missing some information. Could you provide the following details days ?",
+      "model": "Arch-Function-1.5b"
+    },
+    {
+      "role": "user",
+      "content": "for next 10 days"
+    }
+  ],
+  "stream": true
+}
--- a/demos/hr_agent/arch_config.yaml
+++ b/demos/hr_agent/arch_config.yaml
@ -18,7 +18,7 @@ endpoints:
    # value could be ip address or a hostname with port
    # this could also be a list of endpoints for load balancing
    # for example endpoint: [ ip1:port, ip2:port ]
-    endpoint: host.docker.internal:18083
+    endpoint: host.docker.internal:18080
    # max time to wait for a connection to be established
    connect_timeout: 0.005s

--- a/demos/network_agent/arch_config.yaml
+++ b/demos/network_agent/arch_config.yaml
@ -52,6 +52,6 @@ endpoints:
    # value could be ip address or a hostname with port
    # this could also be a list of endpoints for load balancing
    # for example endpoint: [ ip1:port, ip2:port ]
-    endpoint: host.docker.internal:18083
+    endpoint: host.docker.internal:18080
    # max time to wait for a connection to be established
    connect_timeout: 0.005s
--- a/image.png
+++ b/image.png