diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml
index 5750ca74..5988c27e 100644
--- a/.github/workflows/e2e_tests.yml
+++ b/.github/workflows/e2e_tests.yml
@@ -14,6 +14,32 @@ jobs:
       - name: Checkout code
         uses: actions/checkout@v3
 
+      # --- Disk inspection & cleanup section (added to free space on GitHub runner) ---
+      - name: Check disk usage before cleanup
+        run: |
+          echo "=== Disk usage before cleanup ==="
+          df -h
+          echo "=== Repo size ==="
+          du -sh .
+
+      - name: Free disk space on runner
+        run: |
+          echo "=== Cleaning preinstalled SDKs and toolchains to free space ==="
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /opt/ghc
+          # If you still hit disk issues, uncomment this to free more space.
+          # It just removes cached tool versions; setup-python will re-download what it needs.
+          # sudo rm -rf /opt/hostedtoolcache || true
+
+          echo "=== Docker cleanup (before our builds/compose) ==="
+          docker system prune -af || true
+          docker volume prune -f || true
+
+          echo "=== Disk usage after cleanup ==="
+          df -h
+      # --- End disk cleanup section ---
+
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml
index ae9d0fbc..b1c4f487 100644
--- a/arch/envoy.template.yaml
+++ b/arch/envoy.template.yaml
@@ -262,19 +262,16 @@ static_resources:
                       domains:
                         - "*"
                       routes:
-                        {% for internal_cluster in ["arch_fc", "model_server"] %}
                         - match:
                             prefix: "/"
                             headers:
                               - name: "x-arch-upstream"
                                 string_match:
-                                  exact: {{ internal_cluster }}
+                                  exact: bright_staff
                           route:
                             auto_host_rewrite: true
-                            cluster: {{ internal_cluster }}
+                            cluster: bright_staff
                             timeout: 300s
-                        {% endfor %}
-
                         {% for cluster_name, cluster in arch_clusters.items() %}
                         - match:
                             prefix: "/"
@@ -599,7 +596,7 @@ static_resources:
   clusters:
 
     - name: arch
-      connect_timeout: 0.5s
+      connect_timeout: 5s
       type: LOGICAL_DNS
       dns_lookup_family: V4_ONLY
       lb_policy: ROUND_ROBIN
@@ -868,24 +865,6 @@ static_resources:
             tls_params:
               tls_minimum_protocol_version: TLSv1_2
               tls_maximum_protocol_version: TLSv1_3
-
-    {% for internal_cluster in ["arch_fc", "model_server"] %}
-    - name: {{ internal_cluster }}
-      connect_timeout: 0.5s
-      type: STRICT_DNS
-      dns_lookup_family: V4_ONLY
-      lb_policy: ROUND_ROBIN
-      load_assignment:
-        cluster_name: {{ internal_cluster }}
-        endpoints:
-          - lb_endpoints:
-              - endpoint:
-                  address:
-                    socket_address:
-                      address: host.docker.internal
-                      port_value: 51000
-                  hostname: {{ internal_cluster }}
-    {% endfor %}
     - name: mistral_7b_instruct
       connect_timeout: 0.5s
       type: STRICT_DNS
diff --git a/arch/tools/cli/config_generator.py b/arch/tools/cli/config_generator.py
index ead0a351..fd306c60 100644
--- a/arch/tools/cli/config_generator.py
+++ b/arch/tools/cli/config_generator.py
@@ -13,10 +13,10 @@ SUPPORTED_PROVIDERS_WITH_BASE_URL = [
     "ollama",
     "qwen",
     "amazon_bedrock",
+    "arch",
 ]
 
 SUPPORTED_PROVIDERS_WITHOUT_BASE_URL = [
-    "arch",
     "deepseek",
     "groq",
     "mistral",
@@ -304,6 +304,16 @@ def validate_and_render_schema():
                 }
             )
 
+    # Always add arch-function model provider if not already defined
+    if "arch-function" not in model_provider_name_set:
+        updated_model_providers.append(
+            {
+                "name": "arch-function",
+                "provider_interface": "arch",
+                "model": "Arch-Function",
+            }
+        )
+
     config_yaml["model_providers"] = deepcopy(updated_model_providers)
 
     listeners_with_provider = 0
diff --git a/crates/Cargo.lock b/crates/Cargo.lock
index 0115151e..5797d5a2 100644
--- a/crates/Cargo.lock
+++ b/crates/Cargo.lock
@@ -78,6 +78,43 @@ dependencies = [
  "serde_json",
 ]
 
+[[package]]
+name = "async-openai"
+version = "0.30.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6bf39a15c8d613eb61892dc9a287c02277639ebead41ee611ad23aaa613f1a82"
+dependencies = [
+ "async-openai-macros",
+ "backoff",
+ "base64 0.22.1",
+ "bytes",
+ "derive_builder",
+ "eventsource-stream",
+ "futures",
+ "rand 0.9.2",
+ "reqwest",
+ "reqwest-eventsource",
+ "secrecy",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.12",
+ "tokio",
+ "tokio-stream",
+ "tokio-util",
+ "tracing",
+]
+
+[[package]]
+name = "async-openai-macros"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0289cba6d5143bfe8251d57b4a8cac036adf158525a76533a7082ba65ec76398"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.101",
+]
+
 [[package]]
 name = "async-trait"
 version = "0.1.88"
@@ -130,6 +167,20 @@ dependencies = [
  "time",
 ]
 
+[[package]]
+name = "backoff"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1"
+dependencies = [
+ "futures-core",
+ "getrandom 0.2.16",
+ "instant",
+ "pin-project-lite",
+ "rand 0.8.5",
+ "tokio",
+]
+
 [[package]]
 name = "backtrace"
 version = "0.3.75"
@@ -201,7 +252,9 @@ dependencies = [
 name = "brightstaff"
 version = "0.1.0"
 dependencies = [
+ "async-openai",
  "bytes",
+ "chrono",
  "common",
  "eventsource-client",
  "eventsource-stream",
@@ -219,6 +272,7 @@ dependencies = [
  "opentelemetry-stdout",
  "opentelemetry_sdk",
  "pretty_assertions",
+ "rand 0.9.2",
  "reqwest",
  "serde",
  "serde_json",
@@ -231,6 +285,7 @@ dependencies = [
  "tracing",
  "tracing-opentelemetry",
  "tracing-subscriber",
+ "uuid",
 ]
 
 [[package]]
@@ -281,6 +336,12 @@ version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 
+[[package]]
+name = "cfg_aliases"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
+
 [[package]]
 name = "chrono"
 version = "0.4.41"
@@ -289,8 +350,10 @@ checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d"
 dependencies = [
  "android-tzdata",
  "iana-time-zone",
+ "js-sys",
  "num-traits",
  "serde",
+ "wasm-bindgen",
  "windows-link",
 ]
 
@@ -336,6 +399,16 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "core-foundation"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
 [[package]]
 name = "core-foundation-sys"
 version = "0.8.7"
@@ -426,6 +499,37 @@ dependencies = [
  "syn 1.0.109",
 ]
 
+[[package]]
+name = "derive_builder"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947"
+dependencies = [
+ "derive_builder_macro",
+]
+
+[[package]]
+name = "derive_builder_core"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8"
+dependencies = [
+ "darling",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.101",
+]
+
+[[package]]
+name = "derive_builder_macro"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
+dependencies = [
+ "derive_builder_core",
+ "syn 2.0.101",
+]
+
 [[package]]
 name = "diff"
 version = "0.1.13"
@@ -650,6 +754,12 @@ version = "0.3.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
 
+[[package]]
+name = "futures-timer"
+version = "3.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24"
+
 [[package]]
 name = "futures-util"
 version = "0.3.31"
@@ -685,8 +795,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
 dependencies = [
  "cfg-if",
+ "js-sys",
  "libc",
  "wasi 0.11.0+wasi-snapshot-preview1",
+ "wasm-bindgen",
 ]
 
 [[package]]
@@ -696,9 +808,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
 dependencies = [
  "cfg-if",
+ "js-sys",
  "libc",
  "r-efi",
  "wasi 0.14.2+wasi-0.2.4",
+ "wasm-bindgen",
 ]
 
 [[package]]
@@ -934,7 +1048,7 @@ dependencies = [
  "hyper 0.14.32",
  "log",
  "rustls 0.21.12",
- "rustls-native-certs",
+ "rustls-native-certs 0.6.3",
  "tokio",
  "tokio-rustls 0.24.1",
 ]
@@ -949,6 +1063,7 @@ dependencies = [
  "hyper 1.6.0",
  "hyper-util",
  "rustls 0.23.27",
+ "rustls-native-certs 0.8.2",
  "rustls-pki-types",
  "tokio",
  "tokio-rustls 0.26.2",
@@ -1181,6 +1296,15 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "instant"
+version = "0.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
+dependencies = [
+ "cfg-if",
+]
+
 [[package]]
 name = "ipnet"
 version = "2.11.0"
@@ -1285,6 +1409,12 @@ version = "0.4.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
 
+[[package]]
+name = "lru-slab"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
+
 [[package]]
 name = "matchers"
 version = "0.1.0"
@@ -1312,6 +1442,16 @@ version = "0.3.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
 
+[[package]]
+name = "mime_guess"
+version = "2.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e"
+dependencies = [
+ "mime",
+ "unicase",
+]
+
 [[package]]
 name = "minimal-lexical"
 version = "0.2.1"
@@ -1354,7 +1494,7 @@ dependencies = [
  "hyper 1.6.0",
  "hyper-util",
  "log",
- "rand 0.9.1",
+ "rand 0.9.2",
  "regex",
  "serde_json",
  "serde_urlencoded",
@@ -1374,7 +1514,7 @@ dependencies = [
  "openssl-probe",
  "openssl-sys",
  "schannel",
- "security-framework",
+ "security-framework 2.11.1",
  "security-framework-sys",
  "tempfile",
 ]
@@ -1581,7 +1721,7 @@ dependencies = [
  "glob",
  "opentelemetry",
  "percent-encoding",
- "rand 0.9.1",
+ "rand 0.9.2",
  "serde_json",
  "thiserror 2.0.12",
  "tracing",
@@ -1770,6 +1910,61 @@ dependencies = [
  "log",
 ]
 
+[[package]]
+name = "quinn"
+version = "0.11.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20"
+dependencies = [
+ "bytes",
+ "cfg_aliases",
+ "pin-project-lite",
+ "quinn-proto",
+ "quinn-udp",
+ "rustc-hash 2.1.1",
+ "rustls 0.23.27",
+ "socket2",
+ "thiserror 2.0.12",
+ "tokio",
+ "tracing",
+ "web-time",
+]
+
+[[package]]
+name = "quinn-proto"
+version = "0.11.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31"
+dependencies = [
+ "bytes",
+ "getrandom 0.3.3",
+ "lru-slab",
+ "rand 0.9.2",
+ "ring",
+ "rustc-hash 2.1.1",
+ "rustls 0.23.27",
+ "rustls-pki-types",
+ "slab",
+ "thiserror 2.0.12",
+ "tinyvec",
+ "tracing",
+ "web-time",
+]
+
+[[package]]
+name = "quinn-udp"
+version = "0.5.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd"
+dependencies = [
+ "cfg_aliases",
+ "libc",
+ "once_cell",
+ "socket2",
+ "tracing",
+ "windows-sys 0.59.0",
+]
+
 [[package]]
 name = "quote"
 version = "1.0.40"
@@ -1798,9 +1993,9 @@ dependencies = [
 
 [[package]]
 name = "rand"
-version = "0.9.1"
+version = "0.9.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97"
+checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
 dependencies = [
  "rand_chacha 0.9.0",
  "rand_core 0.9.3",
@@ -1941,10 +2136,14 @@ dependencies = [
  "js-sys",
  "log",
  "mime",
+ "mime_guess",
  "native-tls",
  "once_cell",
  "percent-encoding",
  "pin-project-lite",
+ "quinn",
+ "rustls 0.23.27",
+ "rustls-native-certs 0.8.2",
  "rustls-pki-types",
  "serde",
  "serde_json",
@@ -1952,6 +2151,7 @@ dependencies = [
  "sync_wrapper",
  "tokio",
  "tokio-native-tls",
+ "tokio-rustls 0.26.2",
  "tokio-util",
  "tower 0.5.2",
  "tower-http",
@@ -1963,6 +2163,22 @@ dependencies = [
  "web-sys",
 ]
 
+[[package]]
+name = "reqwest-eventsource"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "632c55746dbb44275691640e7b40c907c16a2dc1a5842aa98aaec90da6ec6bde"
+dependencies = [
+ "eventsource-stream",
+ "futures-core",
+ "futures-timer",
+ "mime",
+ "nom",
+ "pin-project-lite",
+ "reqwest",
+ "thiserror 1.0.69",
+]
+
 [[package]]
 name = "ring"
 version = "0.17.14"
@@ -1989,6 +2205,12 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
 
+[[package]]
+name = "rustc-hash"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
+
 [[package]]
 name = "rustix"
 version = "1.0.7"
@@ -2021,6 +2243,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "730944ca083c1c233a75c09f199e973ca499344a2b7ba9e755c457e86fb4a321"
 dependencies = [
  "once_cell",
+ "ring",
  "rustls-pki-types",
  "rustls-webpki 0.103.3",
  "subtle",
@@ -2036,7 +2259,19 @@ dependencies = [
  "openssl-probe",
  "rustls-pemfile",
  "schannel",
- "security-framework",
+ "security-framework 2.11.1",
+]
+
+[[package]]
+name = "rustls-native-certs"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9980d917ebb0c0536119ba501e90834767bffc3d60641457fd84a1f3fd337923"
+dependencies = [
+ "openssl-probe",
+ "rustls-pki-types",
+ "schannel",
+ "security-framework 3.5.1",
 ]
 
 [[package]]
@@ -2054,6 +2289,7 @@ version = "1.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79"
 dependencies = [
+ "web-time",
  "zeroize",
 ]
 
@@ -2142,6 +2378,16 @@ version = "3.0.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "584e070911c7017da6cb2eb0788d09f43d789029b5877d3e5ecc8acf86ceee21"
 
+[[package]]
+name = "secrecy"
+version = "0.10.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e891af845473308773346dc847b2c23ee78fe442e0472ac50e22a18a93d3ae5a"
+dependencies = [
+ "serde",
+ "zeroize",
+]
+
 [[package]]
 name = "security-framework"
 version = "2.11.1"
@@ -2149,7 +2395,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
 dependencies = [
  "bitflags",
- "core-foundation",
+ "core-foundation 0.9.4",
+ "core-foundation-sys",
+ "libc",
+ "security-framework-sys",
+]
+
+[[package]]
+name = "security-framework"
+version = "3.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef"
+dependencies = [
+ "bitflags",
+ "core-foundation 0.10.1",
  "core-foundation-sys",
  "libc",
  "security-framework-sys",
@@ -2420,7 +2679,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
 dependencies = [
  "bitflags",
- "core-foundation",
+ "core-foundation 0.9.4",
  "system-configuration-sys",
 ]
 
@@ -2509,7 +2768,7 @@ dependencies = [
  "fancy-regex",
  "lazy_static",
  "parking_lot",
- "rustc-hash",
+ "rustc-hash 1.1.0",
 ]
 
 [[package]]
@@ -2553,6 +2812,21 @@ dependencies = [
  "zerovec",
 ]
 
+[[package]]
+name = "tinyvec"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa"
+dependencies = [
+ "tinyvec_macros",
+]
+
+[[package]]
+name = "tinyvec_macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
+
 [[package]]
 name = "tokio"
 version = "1.45.1"
@@ -2829,6 +3103,12 @@ version = "1.18.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f"
 
+[[package]]
+name = "unicase"
+version = "2.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539"
+
 [[package]]
 name = "unicode-ident"
 version = "1.0.18"
@@ -2870,6 +3150,18 @@ version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
 
+[[package]]
+name = "uuid"
+version = "1.18.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2"
+dependencies = [
+ "getrandom 0.3.3",
+ "js-sys",
+ "serde",
+ "wasm-bindgen",
+]
+
 [[package]]
 name = "valuable"
 version = "0.1.1"
diff --git a/crates/brightstaff/Cargo.toml b/crates/brightstaff/Cargo.toml
index d424b0e6..3dfd1abe 100644
--- a/crates/brightstaff/Cargo.toml
+++ b/crates/brightstaff/Cargo.toml
@@ -4,7 +4,9 @@ version = "0.1.0"
 edition = "2021"
 
 [dependencies]
+async-openai = "0.30.1"
 bytes = "1.10.1"
+chrono = "0.4"
 common = { version = "0.1.0", path = "../common" }
 eventsource-client = "0.15.0"
 eventsource-stream = "0.2.3"
@@ -21,6 +23,7 @@ opentelemetry-otlp = {version="0.29.0", features=["trace", "tonic", "grpc-tonic"
 opentelemetry-stdout = "0.29.0"
 opentelemetry_sdk = "0.29.0"
 pretty_assertions = "1.4.1"
+rand = "0.9.2"
 reqwest = { version = "0.12.15", features = ["stream"] }
 serde = { version = "1.0.219", features = ["derive"] }
 serde_json = "1.0.140"
@@ -32,6 +35,7 @@ tokio-stream = "0.1"
 time = { version = "0.3", features = ["formatting", "macros"] }
 tracing = "0.1"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
+uuid = { version = "1.0", features = ["v4", "serde"] }
 
 [dev-dependencies]
 mockito = "1.0"
diff --git a/crates/brightstaff/src/handlers/function_calling.rs b/crates/brightstaff/src/handlers/function_calling.rs
new file mode 100644
index 00000000..295228b3
--- /dev/null
+++ b/crates/brightstaff/src/handlers/function_calling.rs
@@ -0,0 +1,1934 @@
+use hermesllm::apis::openai::{
+    ChatCompletionsRequest, ChatCompletionsResponse, Choice, FinishReason, FunctionCall, Message,
+    MessageContent, ResponseMessage, Role, Tool, ToolCall, Usage,
+};
+use serde::{Deserialize, Serialize};
+use serde_json::{json, Value};
+use std::collections::HashMap;
+use thiserror::Error;
+use tracing::{info, error};
+use futures::StreamExt;
+use bytes::Bytes;
+use http_body_util::{combinators::BoxBody, BodyExt, Full};
+use hyper::body::Incoming;
+use hyper::{Request, Response, StatusCode};
+use eventsource_stream::Eventsource;
+
+
+
+// ============================================================================
+// CONSTANTS FOR HALLUCINATION DETECTION
+// ============================================================================
+
+const FUNC_NAME_START_PATTERN: &[&str] = &[r#"{"name":""#, r#"{'name':'"#];
+const FUNC_NAME_END_TOKEN: &[&str] = &["\",", "',"];
+const END_TOOL_CALL_TOKEN: &str = "}}";
+
+const FIRST_PARAM_NAME_START_PATTERN: &[&str] = &[r#""arguments":{"#, r#"'arguments':{'"#];
+const PARAMETER_NAME_END_TOKENS: &[&str] = &["\":", ":\"", "':", ":'", "\":\"", "':'"];
+const PARAMETER_NAME_START_PATTERN: &[&str] = &["\",\"", "','"];
+const PARAMETER_VALUE_START_PATTERN: &[&str] = &["\":", "':"];
+const PARAMETER_VALUE_END_TOKEN: &[&str] = &["\",", "\"}"];
+const ARCH_FUNCTION_MODEL_NAME: &str = "Arch-Function";
+
+/// Default hallucination detection thresholds
+#[derive(Debug, Clone)]
+pub struct HallucinationThresholds {
+    pub entropy: f64,
+    pub varentropy: f64,
+    pub probability: f64,
+}
+
+impl Default for HallucinationThresholds {
+    fn default() -> Self {
+        Self {
+            entropy: 0.0001,
+            varentropy: 0.0001,
+            probability: 0.8,
+        }
+    }
+}
+
+// ============================================================================
+// ERROR TYPES
+// ============================================================================
+
+#[derive(Debug, Error)]
+pub enum FunctionCallingError {
+    #[error("Failed to parse JSON: {0}")]
+    JsonParseError(#[from] serde_json::Error),
+
+    #[error("Failed to fix malformed JSON: {0}")]
+    JsonFixError(String),
+
+    #[error("Invalid model response: {0}")]
+    InvalidModelResponse(String),
+
+    #[error("Tool call verification failed: {0}")]
+    ToolCallVerificationError(String),
+
+    #[error("Data type conversion error: {0}")]
+    DataTypeConversionError(String),
+
+    #[error("Unsupported data type: {0}")]
+    UnsupportedDataType(String),
+
+    #[error("HTTP request error: {0}")]
+    HttpError(#[from] reqwest::Error),
+
+    #[error("Invalid tool call: {0}")]
+    InvalidToolCall(String),
+}
+
+pub type Result<T> = std::result::Result<T, FunctionCallingError>;
+
+// ============================================================================
+// CONFIGURATION STRUCTURES
+// ============================================================================
+
+/// Configuration for Arch Function Calling
+#[derive(Debug, Clone)]
+pub struct ArchFunctionConfig {
+    pub task_prompt: String,
+    pub format_prompt: String,
+    pub generation_params: GenerationParams,
+    pub support_data_types: Vec<String>,
+}
+
+impl Default for ArchFunctionConfig {
+    fn default() -> Self {
+        Self {
+            // Raw string so that \n sequences remain literal in the final prompt
+            task_prompt: r#"You are a helpful assistant designed to assist with the user query by making one or more function calls if needed.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{tools}\n</tools>\n\nYour task is to decide which functions are needed and collect missing parameters if necessary."#.to_string(),
+            // Use raw string to preserve literal \n sequences instead of real newlines
+            format_prompt: r#"\n\nBased on your analysis, provide your response in one of the following JSON formats:\n1. If no functions are needed:\n```json\n{\"response\": \"Your response text here\"}\n```\n2. If functions are needed but some required parameters are missing:\n```json\n{\"required_functions\": [\"func_name1\", \"func_name2\", ...], \"clarification\": \"Text asking for missing parameters\"}\n```\n3. If functions are needed and all required parameters are available:\n```json\n{\"tool_calls\": [{\"name\": \"func_name1\", \"arguments\": {\"argument1\": \"value1\", \"argument2\": \"value2\"}},... (more tool calls as required)]}\n```"#.to_string(),
+            generation_params: GenerationParams::default(),
+            support_data_types: vec![
+                "int".to_string(),
+                "float".to_string(),
+                "bool".to_string(),
+                "str".to_string(),
+                "list".to_string(),
+                "tuple".to_string(),
+                "set".to_string(),
+                "dict".to_string(),
+                // JSON Schema names (standard)
+                "integer".to_string(),
+                "number".to_string(),
+                "boolean".to_string(),
+                "string".to_string(),
+                "array".to_string(),
+                "object".to_string(),
+            ],
+        }
+    }
+}
+
+/// Configuration for Arch Agent (extends ArchFunctionConfig with different generation params)
+#[derive(Debug, Clone)]
+pub struct ArchAgentConfig {
+    pub task_prompt: String,
+    pub format_prompt: String,
+    pub generation_params: GenerationParams,
+    pub support_data_types: Vec<String>,
+}
+
+impl Default for ArchAgentConfig {
+    fn default() -> Self {
+        let base = ArchFunctionConfig::default();
+        Self {
+            task_prompt: base.task_prompt,
+            format_prompt: base.format_prompt,
+            generation_params: GenerationParams {
+                temperature: 0.01,
+                top_p: 1.0,
+                top_k: 10,
+                max_tokens: 1024,
+                stop_token_ids: vec![151645],
+                logprobs: Some(true),
+                top_logprobs: Some(10),
+            },
+            support_data_types: base.support_data_types,
+        }
+    }
+}
+
+/// Generation parameters for LLM
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct GenerationParams {
+    pub temperature: f32,
+    pub top_p: f32,
+    pub top_k: u32,
+    pub max_tokens: u32,
+    pub stop_token_ids: Vec<u32>,
+    pub logprobs: Option<bool>,
+    pub top_logprobs: Option<u32>,
+}
+
+impl Default for GenerationParams {
+    fn default() -> Self {
+        Self {
+            temperature: 0.1,
+            top_p: 1.0,
+            top_k: 10,
+            max_tokens: 1024,
+            stop_token_ids: vec![151645],
+            logprobs: Some(true),
+            top_logprobs: Some(10),
+        }
+    }
+}
+
+// ============================================================================
+// PARSED MODEL RESPONSE
+// ============================================================================
+
+/// Parsed response from the model
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+pub struct ParsedModelResponse {
+    pub raw_response: String,
+    pub response: Option<String>,
+    pub required_functions: Vec<String>,
+    pub clarification: String,
+    pub tool_calls: Vec<ToolCall>,
+    pub is_valid: bool,
+    pub error_message: String,
+}
+
+// ============================================================================
+// TOOL CALL VERIFICATION RESULT
+// ============================================================================
+
+/// Result of tool call verification
+#[derive(Debug, Clone)]
+pub struct ToolCallVerification {
+    pub is_valid: bool,
+    pub invalid_tool_call: Option<ToolCall>,
+    pub error_message: String,
+}
+
+impl Default for ToolCallVerification {
+    fn default() -> Self {
+        Self {
+            is_valid: true,
+            invalid_tool_call: None,
+            error_message: String::new(),
+        }
+    }
+}
+
+/// Main handler for Arch Function Calling
+pub struct ArchFunctionHandler {
+    pub model_name: String,
+    pub config: ArchFunctionConfig,
+    pub default_prefix: String,
+    pub clarify_prefix: String,
+    pub endpoint_url: String,
+    pub http_client: reqwest::Client,
+}
+
+impl ArchFunctionHandler {
+    /// Creates a new ArchFunctionHandler
+    pub fn new(model_name: String, config: ArchFunctionConfig, endpoint_url: String) -> Self {
+        use common::consts::ARCH_PROVIDER_HINT_HEADER;
+        use reqwest::header;
+
+        // Create custom HTTP client with Arch provider hint header
+        let mut headers = header::HeaderMap::new();
+        headers.insert(
+            header::HeaderName::from_static(ARCH_PROVIDER_HINT_HEADER),
+            header::HeaderValue::from_str(&model_name).unwrap(),
+        );
+
+        let http_client = reqwest::ClientBuilder::new()
+            .default_headers(headers)
+            .build()
+            .expect("Failed to create HTTP client");
+
+        Self {
+            model_name,
+            config,
+            default_prefix: r#"```json\n{\""#.to_string(),
+            clarify_prefix: r#"```json\n{\"required_functions\":"#.to_string(),
+            endpoint_url,
+            http_client,
+        }
+    }
+
+    /// Converts a list of tools into JSON format string
+    pub fn convert_tools(&self, tools: &[Tool]) -> Result<String> {
+        let converted: std::result::Result<Vec<String>, serde_json::Error> = tools
+            .iter()
+            .map(|tool| serde_json::to_string(&tool.function))
+            .collect();
+
+        converted
+            .map(|v| v.join("\\n"))
+            .map_err(FunctionCallingError::from)
+    }
+
+    /// Fixes malformed JSON strings by ensuring proper bracket matching
+    pub fn fix_json_string(&self, json_str: &str) -> Result<String> {
+        let json_str = json_str.trim();
+        let mut stack: Vec<char> = Vec::new();
+        let mut fixed_str = String::new();
+
+        let matching_bracket: HashMap<char, char> =
+            [(')', '('), ('}', '{'), (']', '[')]
+                .iter()
+                .cloned()
+                .collect();
+
+        let opening_bracket: HashMap<char, char> = matching_bracket
+            .iter()
+            .map(|(k, v)| (*v, *k))
+            .collect();
+
+        for ch in json_str.chars() {
+            if ch == '{' || ch == '[' || ch == '(' {
+                stack.push(ch);
+                fixed_str.push(ch);
+            } else if ch == '}' || ch == ']' || ch == ')' {
+                if let Some(&last) = stack.last() {
+                    if matching_bracket.get(&ch) == Some(&last) {
+                        stack.pop();
+                        fixed_str.push(ch);
+                    }
+                    // Ignore unmatched closing brackets
+                }
+            } else {
+                fixed_str.push(ch);
+            }
+        }
+
+        // Add corresponding closing brackets for unmatched opening brackets
+        while let Some(unmatched_opening) = stack.pop() {
+            if let Some(&closing) = opening_bracket.get(&unmatched_opening) {
+                fixed_str.push(closing);
+            }
+        }
+
+        // Try to parse the fixed JSON
+        match serde_json::from_str::<Value>(&fixed_str) {
+            Ok(val) => serde_json::to_string(&val).map_err(FunctionCallingError::from),
+            Err(_) => {
+                // Try replacing single quotes with double quotes
+                let fixed_str = fixed_str.replace('\'', "\"");
+                match serde_json::from_str::<Value>(&fixed_str) {
+                    Ok(val) => serde_json::to_string(&val).map_err(FunctionCallingError::from),
+                    Err(e) => Err(FunctionCallingError::JsonFixError(format!(
+                        "Failed to fix JSON: {}",
+                        e
+                    ))),
+                }
+            }
+        }
+    }
+
+    /// Parses the model response and extracts tool call information
+    pub fn parse_model_response(&self, content: &str) -> ParsedModelResponse {
+        let mut response_dict = ParsedModelResponse::default();
+
+        // Remove markdown code blocks
+        let mut content = content.trim().to_string();
+        if content.starts_with("```") && content.ends_with("```") {
+            content = content.trim_start_matches("```").trim_end_matches("```").to_string();
+            if content.starts_with("json") {
+                content = content.trim_start_matches("json").to_string();
+            }
+            // Trim again after removing code blocks to eliminate internal whitespace
+            content = content.trim_start_matches(r"\n").trim_end_matches(r"\n").to_string();
+            content = content.trim().to_string();
+            // Unescape the quotes: \" -> "
+            // The model sometimes returns escaped JSON inside markdown blocks
+            content = content.replace(r#"\""#, "\"");
+        }
+
+        // Try to fix JSON if needed
+        let fixed_content = match self.fix_json_string(&content) {
+            Ok(fixed) => {
+                response_dict.raw_response = format!("```json\n{}\n```", fixed);
+                fixed
+            }
+            Err(e) => {
+                response_dict.is_valid = false;
+                response_dict.error_message = format!("Failed to fix JSON: {}", e);
+                return response_dict;
+            }
+        };
+        // Parse the JSON
+        match serde_json::from_str::<Value>(&fixed_content) {
+            Ok(model_response) => {
+                // Successfully parsed - mark as valid
+                response_dict.is_valid = true;
+
+                // Extract response field
+                if let Some(resp) = model_response.get("response") {
+                    if let Some(resp_str) = resp.as_str() {
+                        response_dict.response = Some(resp_str.to_string());
+                    }
+                }
+
+                // Extract required_functions
+                if let Some(funcs) = model_response.get("required_functions") {
+                    if let Some(funcs_arr) = funcs.as_array() {
+                        response_dict.required_functions = funcs_arr
+                            .iter()
+                            .filter_map(|v| v.as_str().map(String::from))
+                            .collect();
+                    }
+                }
+
+                // Extract clarification
+                if let Some(clarif) = model_response.get("clarification") {
+                    if let Some(clarif_str) = clarif.as_str() {
+                        response_dict.clarification = clarif_str.to_string();
+                    }
+                }
+
+                // Extract tool_calls
+                if let Some(tool_calls) = model_response.get("tool_calls") {
+                    if let Some(tool_calls_arr) = tool_calls.as_array() {
+                        for tool_call_val in tool_calls_arr {
+                            let id = format!("call_{}", rand::random::<u32>() % 10000 + 1000);
+
+                            let name = tool_call_val
+                                .get("name")
+                                .and_then(|v| v.as_str())
+                                .unwrap_or("")
+                                .to_string();
+
+                            let arguments = tool_call_val
+                                .get("arguments")
+                                .map(|v| serde_json::to_string(v).unwrap_or_default())
+                                .unwrap_or_default();
+
+                            response_dict.tool_calls.push(ToolCall {
+                                id,
+                                call_type: "function".to_string(),
+                                function: FunctionCall { name, arguments },
+                            });
+                        }
+                    }
+                }
+            }
+            Err(e) => {
+                response_dict.is_valid = false;
+                response_dict.error_message = format!("Failed to parse model response: {}", e);
+            }
+        }
+
+        response_dict
+    }
+
+    /// Converts data type from one type to another
+    pub fn convert_data_type(&self, value: &Value, target_type: &str) -> Result<Value> {
+        match target_type {
+            // Handle float/number conversions
+            "float" | "number" => {
+                if let Some(int_val) = value.as_i64() {
+                    return Ok(json!(int_val as f64));
+                }
+            }
+            // Handle list/array conversions
+            "list" | "array" => {
+                if let Some(str_val) = value.as_str() {
+                    // Try to parse as JSON array
+                    if let Ok(arr) = serde_json::from_str::<Vec<Value>>(str_val) {
+                        return Ok(json!(arr));
+                    }
+                }
+            }
+            // Handle str/string conversions
+            "str" | "string" => {
+                if !value.is_string() {
+                    return Ok(json!(value.to_string()));
+                }
+            }
+            _ => {}
+        }
+        Ok(value.clone())
+    }
+
+    /// Helper method to check if a value matches the expected type
+    fn check_value_type(&self, value: &Value, target_type: &str) -> bool {
+        match target_type {
+            "int"   | "integer" => value.is_i64() || value.is_u64(),
+            "float" | "number" => value.is_f64() || value.is_i64() || value.is_u64(),
+            "bool"  | "boolean" => value.is_boolean(),
+            "str"   | "string" => value.is_string(),
+            "list"  | "array" => value.is_array(),
+            "dict"  | "object" => value.is_object(),
+            _ => true,
+        }
+    }
+
+    /// Helper method to validate and potentially convert a parameter value to match the target type
+    /// Returns Ok(true) if the value is valid (either originally or after conversion)
+    /// Returns Ok(false) if the value cannot be converted to the target type
+    fn validate_or_convert_parameter(
+        &self,
+        param_value: &Value,
+        target_type: &str,
+    ) -> Result<bool> {
+        // First check: Is it already the correct type?
+        if self.check_value_type(param_value, target_type) {
+            return Ok(true);
+        }
+
+        // Try to convert
+        let converted = self.convert_data_type(param_value, target_type)?;
+
+        // Second check: Is it the correct type after conversion?
+        Ok(self.check_value_type(&converted, target_type))
+    }
+
+    /// Verifies the validity of extracted tool calls against the provided tools
+    pub fn verify_tool_calls(
+        &self,
+        tools: &[Tool],
+        tool_calls: &[ToolCall],
+    ) -> ToolCallVerification {
+        let mut verification = ToolCallVerification::default();
+
+        // Build a map of function name to parameters
+        let mut functions: HashMap<String, &Value> = HashMap::new();
+        for tool in tools {
+            functions.insert(tool.function.name.clone(), &tool.function.parameters);
+        }
+
+        for tool_call in tool_calls {
+            if !verification.is_valid {
+                break;
+            }
+
+            let func_name = &tool_call.function.name;
+
+            // Parse arguments as JSON
+            let func_args: HashMap<String, Value> = match serde_json::from_str(&tool_call.function.arguments) {
+                Ok(args) => args,
+                Err(e) => {
+                    verification.is_valid = false;
+                    verification.invalid_tool_call = Some(tool_call.clone());
+                    verification.error_message = format!("Failed to parse arguments for function '{}': {}", func_name, e);
+                    break;
+                }
+            };
+
+            // Check if function is available
+            if let Some(function_params) = functions.get(func_name) {
+                // Check if all required parameters are present
+                if let Some(required) = function_params.get("required") {
+                    if let Some(required_arr) = required.as_array() {
+                        for required_param in required_arr {
+                            if let Some(param_name) = required_param.as_str() {
+                                if !func_args.contains_key(param_name) {
+                                    verification.is_valid = false;
+                                    verification.invalid_tool_call = Some(tool_call.clone());
+                                    verification.error_message = format!(
+                                        "`{}` is required by the function `{}` but not found in the tool call!",
+                                        param_name, func_name
+                                    );
+                                    break;
+                                }
+                            }
+                        }
+                    }
+                }
+
+                // Verify the data type of each parameter
+                if let Some(properties) = function_params.get("properties") {
+                    if let Some(properties_obj) = properties.as_object() {
+                        for (param_name, param_value) in &func_args {
+                            if let Some(param_schema) = properties_obj.get(param_name) {
+                                if let Some(target_type) = param_schema.get("type").and_then(|v| v.as_str()) {
+                                    if self.config.support_data_types.contains(&target_type.to_string()) {
+                                        // Validate data type using helper method
+                                        match self.validate_or_convert_parameter(param_value, target_type) {
+                                            Ok(is_valid) => {
+                                                if !is_valid {
+                                                    verification.is_valid = false;
+                                                    verification.invalid_tool_call = Some(tool_call.clone());
+                                                    verification.error_message = format!(
+                                                        "Parameter `{}` is expected to have the data type `{}`, got incompatible type.",
+                                                        param_name, target_type
+                                                    );
+                                                    break;
+                                                }
+                                            }
+                                            Err(_) => {
+                                                verification.is_valid = false;
+                                                verification.invalid_tool_call = Some(tool_call.clone());
+                                                verification.error_message = format!(
+                                                    "Parameter `{}` is expected to have the data type `{}`, got incompatible type.",
+                                                    param_name, target_type
+                                                );
+                                                break;
+                                            }
+                                        }
+                                    } else {
+                                        verification.is_valid = false;
+                                        verification.invalid_tool_call = Some(tool_call.clone());
+                                        verification.error_message = format!("Data type `{}` is not supported.", target_type);
+                                        break;
+                                    }
+                                }
+                            } else {
+                                verification.is_valid = false;
+                                verification.invalid_tool_call = Some(tool_call.clone());
+                                verification.error_message = format!(
+                                    "Parameter `{}` is not defined in the function `{}`.",
+                                    param_name, func_name
+                                );
+                                break;
+                            }
+                        }
+                    }
+                }
+            } else {
+                verification.is_valid = false;
+                verification.invalid_tool_call = Some(tool_call.clone());
+                verification.error_message = format!("{} is not available!", func_name);
+            }
+        }
+
+        verification
+    }
+
+    /// Formats the system prompt with tools
+    pub fn format_system_prompt(&self, tools: &[Tool]) -> Result<String> {
+        let tools_str = self.convert_tools(tools)?;
+        let system_prompt = self
+            .config
+            .task_prompt
+            .replace("{tools}", &tools_str)
+            + &self.config.format_prompt;
+
+        Ok(system_prompt)
+    }
+
+    /// Processes messages and formats them appropriately for the model
+    pub fn process_messages(
+        &self,
+        messages: &[Message],
+        tools: Option<&[Tool]>,
+        extra_instruction: Option<&str>,
+        max_tokens: usize,
+        metadata: Option<&HashMap<String, Value>>,
+    ) -> Result<Vec<Message>> {
+        let mut processed_messages = Vec::new();
+
+        // Add system message with tools if provided
+        if let Some(tools) = tools {
+            let system_prompt = self.format_system_prompt(tools)?;
+            processed_messages.push(Message {
+                role: Role::System,
+                content: MessageContent::Text(system_prompt),
+                name: None,
+                tool_calls: None,
+                tool_call_id: None,
+            });
+        }
+
+        // Process each message
+        for (idx, message) in messages.iter().enumerate() {
+            let mut role = message.role.clone();
+            let mut content = match &message.content {
+                MessageContent::Text(text) => text.clone(),
+                MessageContent::Parts(_) => String::new(),
+            };
+
+            // Handle tool calls
+            if let Some(tool_calls) = &message.tool_calls {
+                if !tool_calls.is_empty() {
+                    role = Role::Assistant;
+                    let tool_call_json = serde_json::to_string(&tool_calls[0].function)?;
+                    content = format!("<tool_call>\n{}\n</tool_call>", tool_call_json);
+                }
+            } else if role == Role::Tool {
+                role = Role::User;
+
+                // Check if we should optimize context window
+                let optimize_context = metadata
+                    .and_then(|m| m.get("optimize_context_window"))
+                    .and_then(|v| v.as_str())
+                    .map(|s| s.to_lowercase() == "true")
+                    .unwrap_or(false);
+
+                if optimize_context {
+                    content = "<tool_response>\n\n</tool_response>".to_string();
+                } else {
+                    // Get the tool call from previous message
+                    if idx > 0 {
+                        if let MessageContent::Text(prev_content) = &messages[idx - 1].content {
+                            let mut tool_call_msg = prev_content.clone();
+
+                            // Strip markdown code blocks
+                            if tool_call_msg.starts_with("```") && tool_call_msg.ends_with("```") {
+                                tool_call_msg = tool_call_msg.trim_start_matches("```").trim_end_matches("```").trim().to_string();
+                                if tool_call_msg.starts_with("json") {
+                                    tool_call_msg = tool_call_msg.trim_start_matches("json").trim().to_string();
+                                }
+                            }
+
+                            // Extract function name
+                            if let Ok(parsed) = serde_json::from_str::<Value>(&tool_call_msg) {
+                                if let Some(tool_calls_arr) = parsed.get("tool_calls").and_then(|v| v.as_array()) {
+                                    if let Some(first_tool_call) = tool_calls_arr.first() {
+                                        let func_name = first_tool_call
+                                            .get("name")
+                                            .and_then(|v| v.as_str())
+                                            .unwrap_or("no_name");
+
+                                        let tool_response = json!({
+                                            "name": func_name,
+                                            "result": content,
+                                        });
+
+                                        content = format!("<tool_response>\n{}\n</tool_response>",
+                                            serde_json::to_string(&tool_response)?);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+
+            processed_messages.push(Message {
+                role,
+                content: MessageContent::Text(content),
+                name: message.name.clone(),
+                tool_calls: None,
+                tool_call_id: None,
+            });
+        }
+
+        // Ensure last message is from user
+        if let Some(last) = processed_messages.last() {
+            if last.role != Role::User {
+                return Err(FunctionCallingError::InvalidModelResponse(
+                    "Last message must be from user".to_string(),
+                ));
+            }
+        }
+
+        // Add extra instruction if provided
+        if let Some(instruction) = extra_instruction {
+            if let Some(last) = processed_messages.last_mut() {
+                if let MessageContent::Text(content) = &mut last.content {
+                    content.push_str("\n");
+                    content.push_str(instruction);
+                }
+            }
+        }
+
+        // Truncate messages if they exceed max_tokens
+        let processed_messages = self.truncate_messages(processed_messages, max_tokens);
+
+        Ok(processed_messages)
+    }
+
+    /// Truncates messages to fit within max_tokens limit
+    fn truncate_messages(&self, messages: Vec<Message>, max_tokens: usize) -> Vec<Message> {
+        let mut num_tokens = 0;
+        let mut conversation_idx = 0;
+
+        // Keep system message if present
+        if let Some(first) = messages.first() {
+            if first.role == Role::System {
+                if let MessageContent::Text(content) = &first.content {
+                    num_tokens += content.len() / 4; // Approximate 4 chars per token
+                }
+                conversation_idx = 1;
+            }
+        }
+
+        // Calculate from the end backwards
+        // Start with message_idx pointing past the end (will be used if no truncation needed)
+        let mut message_idx = messages.len();
+        for i in (conversation_idx..messages.len()).rev() {
+            if let MessageContent::Text(content) = &messages[i].content {
+                num_tokens += content.len() / 4;
+                if num_tokens >= max_tokens {
+                    if messages[i].role == Role::User {
+                        // Set message_idx to current position and break
+                        // This matches Python's behavior where message_idx is set before break
+                        message_idx = i;
+                        break;
+                    }
+                }
+            }
+            // Only update message_idx if we haven't hit the token limit yet
+            // This ensures message_idx points to where truncation should start
+            if num_tokens < max_tokens {
+                message_idx = i;
+            }
+        }
+
+        // Return system message + truncated conversation
+        let mut result = Vec::new();
+        if conversation_idx > 0 {
+            result.push(messages[0].clone());
+        }
+        result.extend_from_slice(&messages[message_idx..]);
+
+        result
+    }
+
+    /// Prefills a message by adding an assistant message with the prefix
+    pub fn prefill_message(&self, mut messages: Vec<Message>, prefill: &str) -> Vec<Message> {
+        messages.push(Message {
+            role: Role::Assistant,
+            content: MessageContent::Text(prefill.to_string()),
+            name: None,
+            tool_calls: None,
+            tool_call_id: None,
+        });
+        messages
+    }
+
+    /// Helper to create a request with VLLM-specific parameters
+    fn create_request_with_extra_body(&self, messages: Vec<Message>, stream: bool) -> ChatCompletionsRequest {
+        ChatCompletionsRequest {
+            model: self.model_name.clone(),
+            messages,
+            temperature: Some(self.config.generation_params.temperature),
+            top_p: Some(self.config.generation_params.top_p),
+            max_tokens: Some(self.config.generation_params.max_tokens),
+            stream: Some(stream),
+            logprobs: self.config.generation_params.logprobs,
+            top_logprobs: self.config.generation_params.top_logprobs,
+            // VLLM-specific parameters
+            continue_final_message: Some(true),
+            add_generation_prompt: Some(false),
+            top_k: Some(self.config.generation_params.top_k),
+            stop_token_ids: if !self.config.generation_params.stop_token_ids.is_empty() {
+                Some(self.config.generation_params.stop_token_ids.clone())
+            } else {
+                None
+            },
+            ..Default::default()
+        }
+    }
+
+    /// Makes a streaming request and returns the SSE event stream
+    async fn make_streaming_request(&self, request: ChatCompletionsRequest) -> Result<std::pin::Pin<Box<dyn futures::Stream<Item = std::result::Result<Value, String>> + Send>>> {
+        let request_body = serde_json::to_string(&request)
+            .map_err(|e| FunctionCallingError::InvalidModelResponse(format!("Failed to serialize request: {}", e)))?;
+
+        let response = self.http_client
+            .post(&self.endpoint_url)
+            .header("Content-Type", "application/json")
+            .body(request_body)
+            .send()
+            .await
+            .map_err(|e| FunctionCallingError::HttpError(e))?;
+
+        if !response.status().is_success() {
+            let status = response.status();
+            let error_text = response.text().await.unwrap_or_else(|_| "Unknown error".to_string());
+            return Err(FunctionCallingError::InvalidModelResponse(
+                format!("HTTP error {}: {}", status, error_text)
+            ));
+        }
+
+        // Parse SSE stream
+        let stream = response.bytes_stream().eventsource();
+        let parsed_stream = stream.filter_map(|event_result| async move {
+            match event_result {
+                Ok(event) => {
+                    // Skip [DONE] sentinel
+                    if event.data == "[DONE]" {
+                        return None;
+                    }
+                    // Parse JSON
+                    match serde_json::from_str::<Value>(&event.data) {
+                        Ok(json) => Some(Ok(json)),
+                        Err(e) => Some(Err(format!("JSON parse error: {}", e))),
+                    }
+                }
+                Err(e) => Some(Err(format!("SSE stream error: {}", e))),
+            }
+        });
+
+        Ok(Box::pin(parsed_stream))
+    }
+
+    /// Makes a non-streaming request and returns the response
+    async fn make_non_streaming_request(&self, request: ChatCompletionsRequest) -> Result<ChatCompletionsResponse> {
+        let request_body = serde_json::to_string(&request)
+            .map_err(|e| FunctionCallingError::InvalidModelResponse(format!("Failed to serialize request: {}", e)))?;
+
+        let response = self.http_client
+            .post(&self.endpoint_url)
+            .header("Content-Type", "application/json")
+            .body(request_body)
+            .send()
+            .await
+            .map_err(|e| FunctionCallingError::HttpError(e))?;
+
+        if !response.status().is_success() {
+            let status = response.status();
+            let error_text = response.text().await.unwrap_or_else(|_| "Unknown error".to_string());
+            return Err(FunctionCallingError::InvalidModelResponse(
+                format!("HTTP error {}: {}", status, error_text)
+            ));
+        }
+
+        let response_text = response.text().await
+            .map_err(|e| FunctionCallingError::HttpError(e))?;
+
+        serde_json::from_str(&response_text)
+            .map_err(|e| FunctionCallingError::JsonParseError(e))
+    }
+
+    pub async fn function_calling_chat(
+        &self,
+        request: ChatCompletionsRequest,
+    ) -> Result<ChatCompletionsResponse> {
+        use tracing::{info, error};
+
+        info!("[Arch-Function] - ChatCompletion");
+
+        let messages = self.process_messages(
+            &request.messages,
+            request.tools.as_deref(),
+            None,
+            self.config.generation_params.max_tokens as usize,
+            request.metadata.as_ref(),
+        )?;
+
+        info!("[request to arch-fc]: model: {}, messages count: {}",
+            self.model_name, messages.len());
+
+        let use_agent_orchestrator = request.metadata
+            .as_ref()
+            .and_then(|m| m.get("use_agent_orchestrator"))
+            .and_then(|v| v.as_bool())
+            .unwrap_or(false);
+
+        let prefilled_messages = self.prefill_message(messages.clone(), &self.default_prefix);
+
+        // Create request with extra_body parameters
+        let stream_request = self.create_request_with_extra_body(prefilled_messages.clone(), true);
+        let mut stream = self.make_streaming_request(stream_request).await?;
+
+        let mut model_response = String::new();
+
+        if use_agent_orchestrator {
+            while let Some(chunk_result) = stream.next().await {
+                let chunk = chunk_result.map_err(|e| FunctionCallingError::InvalidModelResponse(e))?;
+                // Extract content from JSON response
+                if let Some(choices) = chunk.get("choices").and_then(|v| v.as_array()) {
+                    if let Some(choice) = choices.first() {
+                        if let Some(content) = choice.get("delta")
+                            .and_then(|d| d.get("content"))
+                            .and_then(|c| c.as_str()) {
+                            model_response.push_str(content);
+                        }
+                    }
+                }
+            }
+            info!("[Agent Orchestrator]: response received");
+        } else {
+            if let Some(tools) = request.tools.as_ref() {
+                let mut hallucination_state = HallucinationState::new(tools);
+                let mut has_tool_calls = None;
+                let mut has_hallucination = false;
+
+                while let Some(chunk_result) = stream.next().await {
+                    let chunk = chunk_result.map_err(|e| FunctionCallingError::InvalidModelResponse(e))?;
+
+                    // Extract content and logprobs from JSON response
+                    if let Some(choices) = chunk.get("choices").and_then(|v| v.as_array()) {
+                        if let Some(choice) = choices.first() {
+                            if let Some(content) = choice.get("delta")
+                                .and_then(|d| d.get("content"))
+                                .and_then(|c| c.as_str()) {
+
+                                // Extract logprobs
+                                let logprobs: Vec<f64> = choice.get("logprobs")
+                                    .and_then(|lp| lp.get("content"))
+                                    .and_then(|c| c.as_array())
+                                    .and_then(|arr| arr.first())
+                                    .and_then(|token| token.get("top_logprobs"))
+                                    .and_then(|tlp| tlp.as_array())
+                                    .map(|arr| {
+                                        arr.iter()
+                                            .filter_map(|v| v.get("logprob").and_then(|lp| lp.as_f64()))
+                                            .collect()
+                                    })
+                                    .unwrap_or_default();
+
+                                if hallucination_state.append_and_check_token_hallucination(content.to_string(), logprobs) {
+                                    has_hallucination = true;
+                                    break;
+                                }
+
+                                if hallucination_state.tokens.len() > 5 && has_tool_calls.is_none() {
+                                    let collected_content = hallucination_state.tokens.join("");
+                                    has_tool_calls = Some(collected_content.contains("tool_calls"));
+                                }
+                            }
+                        }
+                    }
+                }
+
+                if has_tool_calls == Some(true) && has_hallucination {
+                    info!("[Hallucination]: {}", hallucination_state.error_message);
+
+                    let clarify_messages = self.prefill_message(messages.clone(), &self.clarify_prefix);
+                    let clarify_request = self.create_request_with_extra_body(clarify_messages, false);
+
+                    let retry_response = self.make_non_streaming_request(clarify_request).await?;
+
+                    if let Some(choice) = retry_response.choices.first() {
+                        if let Some(content) = &choice.message.content {
+                            model_response = content.clone();
+                        }
+                    }
+                } else {
+                    model_response = hallucination_state.tokens.join("");
+                }
+            } else {
+                while let Some(chunk_result) = stream.next().await {
+                    let chunk = chunk_result.map_err(|e| FunctionCallingError::InvalidModelResponse(e))?;
+                    if let Some(choices) = chunk.get("choices").and_then(|v| v.as_array()) {
+                        if let Some(choice) = choices.first() {
+                            if let Some(content) = choice.get("delta")
+                                .and_then(|d| d.get("content"))
+                                .and_then(|c| c.as_str()) {
+                                model_response.push_str(content);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        let response_dict = self.parse_model_response(&model_response);
+
+        info!("[arch-fc]: raw model response: {}", response_dict.raw_response);
+
+        // General model response (no intent matched - should route to default target)
+        let model_message = if response_dict.response.as_ref().map_or(false, |s| !s.is_empty()) {
+            // When arch-fc returns a "response" field, it means no intent was matched
+            // Return empty content and empty tool_calls so prompt_gateway routes to default target
+            ResponseMessage {
+                role: Role::Assistant,
+                content: Some(String::new()),
+                refusal: None,
+                annotations: None,
+                audio: None,
+                function_call: None,
+                tool_calls: None,
+            }
+        } else if !response_dict.required_functions.is_empty() {
+            if !use_agent_orchestrator {
+                ResponseMessage {
+                    role: Role::Assistant,
+                    content: Some(response_dict.clarification.clone()),
+                    refusal: None,
+                    annotations: None,
+                    audio: None,
+                    function_call: None,
+                    tool_calls: None,
+                }
+            } else {
+                ResponseMessage {
+                    role: Role::Assistant,
+                    content: Some(String::new()),
+                    refusal: None,
+                    annotations: None,
+                    audio: None,
+                    function_call: None,
+                    tool_calls: None,
+                }
+            }
+        } else if !response_dict.tool_calls.is_empty() {
+            if response_dict.is_valid {
+                if !use_agent_orchestrator {
+                    if let Some(tools) = request.tools.as_ref() {
+                        let verification = self.verify_tool_calls(tools, &response_dict.tool_calls);
+
+                        if verification.is_valid {
+                            info!("[Tool calls]: {:?}",
+                                response_dict.tool_calls.iter()
+                                    .map(|tc| &tc.function)
+                                    .collect::<Vec<_>>()
+                            );
+                            ResponseMessage {
+                                role: Role::Assistant,
+                                content: Some(String::new()),
+                                refusal: None,
+                                annotations: None,
+                                audio: None,
+                                function_call: None,
+                                tool_calls: Some(response_dict.tool_calls.clone()),
+                            }
+                        } else {
+                            error!("Invalid tool call - {}", verification.error_message);
+                            ResponseMessage {
+                                role: Role::Assistant,
+                                content: Some(String::new()),
+                                refusal: None,
+                                annotations: None,
+                                audio: None,
+                                function_call: None,
+                                tool_calls: None,
+                            }
+                        }
+                    } else {
+                        error!("Tool calls present but no tools provided in request");
+                        ResponseMessage {
+                            role: Role::Assistant,
+                            content: Some(String::new()),
+                            refusal: None,
+                            annotations: None,
+                            audio: None,
+                            function_call: None,
+                            tool_calls: None,
+                        }
+                    }
+                } else {
+                    info!("[Tool calls]: {:?}",
+                        response_dict.tool_calls.iter()
+                            .map(|tc| &tc.function)
+                            .collect::<Vec<_>>()
+                    );
+                    ResponseMessage {
+                        role: Role::Assistant,
+                        content: Some(String::new()),
+                        refusal: None,
+                        annotations: None,
+                        audio: None,
+                        function_call: None,
+                        tool_calls: Some(response_dict.tool_calls.clone()),
+                    }
+                }
+            } else {
+                error!("Invalid tool calls in response: {}", response_dict.error_message);
+                ResponseMessage {
+                    role: Role::Assistant,
+                    content: Some(String::new()),
+                    refusal: None,
+                    annotations: None,
+                    audio: None,
+                    function_call: None,
+                    tool_calls: None,
+                }
+            }
+        } else {
+            error!("Invalid model response - {}", model_response);
+            ResponseMessage {
+                role: Role::Assistant,
+                content: Some(String::new()),
+                refusal: None,
+                annotations: None,
+                audio: None,
+                function_call: None,
+                tool_calls: None,
+            }
+        };
+
+        // Create metadata with the raw model response
+        let mut metadata = HashMap::new();
+        metadata.insert(
+            "x-arch-fc-model-response".to_string(),
+            serde_json::to_value(&response_dict.raw_response)
+                .unwrap_or_else(|_| Value::String(response_dict.raw_response.clone())),
+        );
+
+        let chat_completion_response = ChatCompletionsResponse {
+            id: format!("chatcmpl-{}", uuid::Uuid::new_v4()),
+            object: Some("chat.completion".to_string()),
+            created: chrono::Utc::now().timestamp() as u64,
+            model: request.model.clone(),
+            choices: vec![Choice {
+                index: 0,
+                message: model_message,
+                finish_reason: Some(FinishReason::Stop),
+                logprobs: None,
+            }],
+            usage: Usage {
+                prompt_tokens: 0,
+                completion_tokens: 0,
+                total_tokens: 0,
+                prompt_tokens_details: None,
+                completion_tokens_details: None,
+            },
+            system_fingerprint: None,
+            service_tier: None,
+            metadata: Some(metadata),
+        };
+
+        info!("[response arch-fc]: {:?}", chat_completion_response);
+
+        Ok(chat_completion_response)
+    }
+}
+
+// ============================================================================
+// ARCH AGENT HANDLER
+// ============================================================================
+
+/// Handler for Arch Agent (extends ArchFunctionHandler with specialized behavior)
+pub struct ArchAgentHandler {
+    pub function_handler: ArchFunctionHandler,
+}
+
+impl ArchAgentHandler {
+    /// Creates a new ArchAgentHandler
+    pub fn new(model_name: String, endpoint_url: String) -> Self {
+        let config = ArchAgentConfig::default();
+        Self {
+            function_handler: ArchFunctionHandler::new(
+                model_name,
+                ArchFunctionConfig {
+                    task_prompt: config.task_prompt,
+                    format_prompt: config.format_prompt,
+                    generation_params: GenerationParams {
+                        temperature: config.generation_params.temperature,
+                        top_p: config.generation_params.top_p,
+                        top_k: config.generation_params.top_k,
+                        max_tokens: config.generation_params.max_tokens,
+                        stop_token_ids: config.generation_params.stop_token_ids,
+                        logprobs: config.generation_params.logprobs,
+                        top_logprobs: config.generation_params.top_logprobs,
+                    },
+                    support_data_types: config.support_data_types,
+                },
+                endpoint_url,
+            ),
+        }
+    }
+
+    /// Converts tools with special handling for empty parameters
+    /// This is the key difference from ArchFunctionHandler
+    pub fn convert_tools(&self, tools: &[Tool]) -> Result<String> {
+        let mut converted = Vec::new();
+
+        for tool in tools {
+            let mut tool_copy = tool.clone();
+
+            // Delete parameters key if its empty
+            if let Some(props) = tool_copy.function.parameters.get("properties") {
+                if props.is_object() && props.as_object().unwrap().is_empty() {
+                    // Create new parameters without properties
+                    if let Some(params_obj) = tool_copy.function.parameters.as_object_mut() {
+                        params_obj.remove("properties");
+                    }
+                }
+            }
+
+            converted.push(serde_json::to_string(&tool_copy.function)?);
+        }
+
+        Ok(converted.join("\n"))
+    }
+}
+
+// ============================================================================
+// HTTP HANDLER FOR FUNCTION CALLING ENDPOINT
+// ============================================================================
+
+fn full<T: Into<Bytes>>(chunk: T) -> BoxBody<Bytes, hyper::Error> {
+    Full::new(chunk.into())
+        .map_err(|never| match never {})
+        .boxed()
+}
+
+pub async fn function_calling_chat_handler(
+    req: Request<Incoming>,
+    llm_provider_url: String,
+) -> std::result::Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
+
+    use hermesllm::apis::openai::ChatCompletionsRequest;
+    let whole_body = req.collect().await?.to_bytes();
+
+    // Parse as JSON Value first to modify it
+    let mut body_json: Value = match serde_json::from_slice(&whole_body) {
+        Ok(json) => json,
+        Err(e) => {
+            error!("Failed to parse request body as JSON: {}", e);
+            let mut response = Response::new(full(
+                serde_json::json!({
+                    "error": format!("Invalid request body: {}", e)
+                }).to_string()
+            ));
+            *response.status_mut() = StatusCode::BAD_REQUEST;
+            response.headers_mut().insert("Content-Type", "application/json".parse().unwrap());
+            return Ok(response);
+        }
+    };
+
+    // Add "model": "Arch-Function" to the request
+    if let Some(obj) = body_json.as_object_mut() {
+        obj.insert("model".to_string(), ARCH_FUNCTION_MODEL_NAME.into());
+    }
+
+    // Parse as ChatCompletionsRequest
+    let chat_request: ChatCompletionsRequest = match serde_json::from_value(body_json) {
+        Ok(req) => {
+            info!("[request body]: {}", serde_json::to_string(&req).unwrap_or_default());
+            req
+        },
+        Err(e) => {
+            error!("Failed to parse request body: {}", e);
+            let mut response = Response::new(full(
+                serde_json::json!({
+                    "error": format!("Invalid request body: {}", e)
+                }).to_string()
+            ));
+            *response.status_mut() = StatusCode::BAD_REQUEST;
+            response.headers_mut().insert("Content-Type", "application/json".parse().unwrap());
+            return Ok(response);
+        }
+    };
+
+    // Determine which handler to use based on metadata
+    let use_agent_orchestrator = chat_request.metadata
+        .as_ref()
+        .and_then(|m| m.get("use_agent_orchestrator"))
+        .and_then(|v| v.as_bool())
+        .unwrap_or(false);
+
+    info!("Use agent orchestrator: {}", use_agent_orchestrator);
+
+    // Create the appropriate handler
+    let handler_name = if use_agent_orchestrator {
+        "Arch-Agent"
+    } else {
+        "Arch-Function"
+    };
+
+    // Call the handler
+    let final_response = if use_agent_orchestrator {
+        let handler = ArchAgentHandler::new(
+            ARCH_FUNCTION_MODEL_NAME.to_string(),
+            llm_provider_url.clone(),
+        );
+        handler.function_handler.function_calling_chat(chat_request).await
+    } else {
+        let handler = ArchFunctionHandler::new(
+            ARCH_FUNCTION_MODEL_NAME.to_string(),
+            ArchFunctionConfig::default(),
+            llm_provider_url.clone(),
+        );
+        handler.function_calling_chat(chat_request).await
+    };
+
+    match final_response {
+        Ok(response_data) => {
+            let response_json = serde_json::to_string(&response_data).unwrap_or_else(|e| {
+                error!("Failed to serialize response: {}", e);
+                serde_json::json!({"error": "Failed to serialize response"}).to_string()
+            });
+
+            let mut response = Response::new(full(response_json));
+            *response.status_mut() = StatusCode::OK;
+            response.headers_mut().insert("Content-Type", "application/json".parse().unwrap());
+
+            Ok(response)
+        }
+        Err(e) => {
+            error!("[{}] - Error in function calling: {}", handler_name, e);
+
+            let error_response = serde_json::json!({
+                "error": format!("[{}] - Error in function calling: {}", handler_name, e)
+            });
+
+            let mut response = Response::new(full(error_response.to_string()));
+            *response.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
+            response.headers_mut().insert("Content-Type", "application/json".parse().unwrap());
+            Ok(response)
+        }
+    }
+}
+
+
+// ============================================================================
+// TESTS
+// ============================================================================
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_arch_function_config_default() {
+        let config = ArchFunctionConfig::default();
+        assert!(config.task_prompt.contains("helpful assistant"));
+        assert!(config.format_prompt.contains("JSON formats"));
+        assert_eq!(config.generation_params.temperature, 0.1);
+        assert_eq!(config.support_data_types.len(), 14); // 8 Python-style + 6 JSON Schema names
+
+        // Verify prompt formatting for literal escaped newlines ("\\n") instead of actual newline chars
+        // The user requirement changed prompts to display "\\n" sequences literally.
+        assert!(config.task_prompt.contains("\\n\\nYou are provided"));
+        assert!(config.task_prompt.contains("</tools>\\n\\n"));
+
+        // Format prompt should contain literal escaped newlines and proper JSON examples
+        assert!(config.format_prompt.contains("\\n\\nBased on your analysis"));
+        assert!(config.format_prompt.contains(r#"{\"response\": \"Your response text here\"}"#));
+        assert!(config.format_prompt.contains(r#"{\"tool_calls\": [{"#));
+
+    }
+
+    #[test]
+    fn test_arch_agent_config_default() {
+        let config = ArchAgentConfig::default();
+        assert_eq!(config.generation_params.temperature, 0.01); // Different from ArchFunctionConfig
+    }
+
+    #[test]
+    fn test_fix_json_string_valid() {
+        let handler = ArchFunctionHandler::new("test-model".to_string(), ArchFunctionConfig::default(), "http://localhost:8000".to_string());
+        let json_str = r#"{"name": "test", "value": 123}"#;
+        let result = handler.fix_json_string(json_str);
+        assert!(result.is_ok());
+    }
+
+    #[test]
+    fn test_fix_json_string_missing_bracket() {
+        let handler = ArchFunctionHandler::new("test-model".to_string(), ArchFunctionConfig::default(), "http://localhost:8000".to_string());
+        let json_str = r#"{"name": "test", "value": 123"#;
+        let result = handler.fix_json_string(json_str);
+        assert!(result.is_ok());
+        let fixed = result.unwrap();
+        assert!(fixed.contains("}"));
+    }
+
+    #[test]
+    fn test_parse_model_response_with_tool_calls() {
+        let handler = ArchFunctionHandler::new("test-model".to_string(), ArchFunctionConfig::default(), "http://localhost:8000".to_string());
+        let content = r#"{"tool_calls": [{"name": "get_weather", "arguments": {"location": "NYC"}}]}"#;
+        let result = handler.parse_model_response(content);
+
+        assert!(result.is_valid);
+        assert_eq!(result.tool_calls.len(), 1);
+        assert_eq!(result.tool_calls[0].function.name, "get_weather");
+    }
+
+    #[test]
+    fn test_parse_model_response_with_clarification() {
+        let handler = ArchFunctionHandler::new("test-model".to_string(), ArchFunctionConfig::default(), "http://localhost:8000".to_string());
+        let content = r#"{"required_functions": ["get_weather"], "clarification": "What location?"}"#;
+        let result = handler.parse_model_response(content);
+
+        assert!(result.is_valid);
+        assert_eq!(result.required_functions.len(), 1);
+        assert_eq!(result.clarification, "What location?");
+    }
+
+    #[test]
+    fn test_convert_data_type_int_to_float() {
+        let handler = ArchFunctionHandler::new("test-model".to_string(), ArchFunctionConfig::default(), "http://localhost:8000".to_string());
+        let value = json!(42);
+        let result = handler.convert_data_type(&value, "float");
+        assert!(result.is_ok());
+        assert!(result.unwrap().is_f64());
+    }
+}
+
+// ============================================================================
+// HALLUCINATION DETECTION MODULE
+// ============================================================================
+
+/// Mask token types for tracking parsing state
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum MaskToken {
+    FunctionName,
+    ParameterValue,
+    ParameterName,
+    NotUsed,
+    ToolCall,
+}
+
+/// Uncertainty metrics calculated from log probabilities
+#[derive(Debug, Clone)]
+pub struct UncertaintyMetrics {
+    pub entropy: f64,
+    pub varentropy: f64,
+    pub probability: f64,
+}
+
+/// Calculates uncertainty metrics from log probabilities
+///
+/// This is a simplified Rust implementation that avoids torch/tensor dependencies.
+/// Uses basic statistical calculations instead of tensor operations.
+pub fn calculate_uncertainty(log_probs: &[f64]) -> UncertaintyMetrics {
+    if log_probs.is_empty() {
+        return UncertaintyMetrics {
+            entropy: 0.0,
+            varentropy: 0.0,
+            probability: 0.0,
+        };
+    }
+
+    // Convert log probabilities to probabilities
+    let token_probs: Vec<f64> = log_probs.iter().map(|&lp| lp.exp()).collect();
+
+    // Calculate entropy: -sum(p * log(p)) / log(2)
+    let mut entropy = 0.0;
+    for i in 0..log_probs.len() {
+        entropy -= log_probs[i] * token_probs[i];
+    }
+    entropy /= 2_f64.ln(); // Convert to bits
+
+    // Calculate variance of entropy
+    let mut varentropy = 0.0;
+    for i in 0..log_probs.len() {
+        let diff = log_probs[i] / 2_f64.ln() + entropy;
+        varentropy += token_probs[i] * diff * diff;
+    }
+
+    // Get the top probability
+    let probability = token_probs.first().copied().unwrap_or(0.0);
+
+    UncertaintyMetrics {
+        entropy,
+        varentropy,
+        probability,
+    }
+}
+
+/// Checks if uncertainty metrics exceed thresholds
+pub fn check_threshold(
+    entropy: f64,
+    varentropy: f64,
+    thresholds: &HallucinationThresholds,
+) -> bool {
+    entropy > thresholds.entropy && varentropy > thresholds.varentropy
+}
+
+/// Checks if a parameter is required in the function description
+pub fn is_parameter_required(
+    function_description: &Value,
+    parameter_name: &str,
+) -> bool {
+    if let Some(required) = function_description.get("required") {
+        if let Some(required_arr) = required.as_array() {
+            return required_arr.iter().any(|v| v.as_str() == Some(parameter_name));
+        }
+    }
+    false
+}
+
+/// Checks if a parameter has a specific property
+pub fn is_parameter_property(
+    function_description: &Value,
+    parameter_name: &str,
+    property_name: &str,
+) -> bool {
+    if let Some(properties) = function_description.get("properties") {
+        if let Some(param_info) = properties.get(parameter_name) {
+            return param_info.get(property_name).is_some();
+        }
+    }
+    false
+}
+
+/// State for hallucination detection during streaming
+///
+/// This is a simplified version of the Python HallucinationState that doesn't
+/// require torch/tensor dependencies. It provides the core functionality needed
+/// for detecting hallucinations during function calling.
+#[derive(Debug)]
+pub struct HallucinationState {
+    pub tokens: Vec<String>,
+    pub logprobs: Vec<Vec<f64>>,
+    pub state: Option<String>,
+    pub mask: Vec<MaskToken>,
+    pub parameter_name_done: bool,
+    pub hallucination: bool,
+    pub error_message: String,
+    pub parameter_name: Vec<String>,
+    pub token_probs_map: Vec<(String, f64, f64, f64)>,
+    pub function_properties: HashMap<String, Value>,
+    pub open_bracket: bool,
+    pub bracket: Option<char>,
+    pub function_name: String,
+    pub check_parameter_name: HashMap<String, bool>,
+    pub thresholds: HallucinationThresholds,
+}
+
+impl HallucinationState {
+    /// Creates a new HallucinationState with function definitions
+    pub fn new(functions: &[Tool]) -> Self {
+        let function_properties: HashMap<String, Value> = functions
+            .iter()
+            .map(|tool| {
+                (
+                    tool.function.name.clone(),
+                    tool.function.parameters.clone(),
+                )
+            })
+            .collect();
+
+        Self {
+            tokens: Vec::new(),
+            logprobs: Vec::new(),
+            state: None,
+            mask: Vec::new(),
+            parameter_name_done: false,
+            hallucination: false,
+            error_message: String::new(),
+            parameter_name: Vec::new(),
+            token_probs_map: Vec::new(),
+            function_properties,
+            open_bracket: false,
+            bracket: None,
+            function_name: String::new(),
+            check_parameter_name: HashMap::new(),
+            thresholds: HallucinationThresholds::default(),
+        }
+    }
+
+    /// Appends a token and checks for hallucination
+    pub fn append_and_check_token_hallucination(
+        &mut self,
+        token: String,
+        logprob: Vec<f64>,
+    ) -> bool {
+        self.tokens.push(token);
+        self.logprobs.push(logprob);
+        self.process_token();
+        self.hallucination
+    }
+
+    /// Resets internal parameters
+    fn reset_parameters(&mut self) {
+        self.state = None;
+        self.parameter_name_done = false;
+        self.hallucination = false;
+        self.error_message.clear();
+        self.open_bracket = false;
+        self.bracket = None;
+        self.check_parameter_name.clear();
+    }
+
+    /// Processes the current token and updates state
+    fn process_token(&mut self) {
+        let content: String = self.tokens.join("").replace(' ', "");
+
+        // Handle end of tool call
+        if content.ends_with(END_TOOL_CALL_TOKEN) {
+            self.reset_parameters();
+        }
+
+        // Function name extraction logic
+        if self.state.as_deref() == Some("function_name") {
+            if !FUNC_NAME_END_TOKEN.iter().any(|&t| self.tokens.last().map_or(false, |tok| tok == t)) {
+                self.mask.push(MaskToken::FunctionName);
+            } else {
+                self.state = None;
+                self.get_function_name();
+            }
+        }
+
+        // Check for function name start
+        if FUNC_NAME_START_PATTERN.iter().any(|&p| content.ends_with(p)) {
+            self.state = Some("function_name".to_string());
+        }
+
+        // Parameter name extraction logic
+        if self.state.as_deref() == Some("parameter_name")
+            && !PARAMETER_NAME_END_TOKENS.iter().any(|&t| content.ends_with(t)) {
+            self.mask.push(MaskToken::ParameterName);
+        } else if self.state.as_deref() == Some("parameter_name")
+            && PARAMETER_NAME_END_TOKENS.iter().any(|&t| content.ends_with(t)) {
+            self.state = None;
+            self.parameter_name_done = true;
+            self.get_parameter_name();
+        } else if self.parameter_name_done
+            && !self.open_bracket
+            && PARAMETER_NAME_START_PATTERN.iter().any(|&p| content.ends_with(p)) {
+            self.state = Some("parameter_name".to_string());
+        }
+
+        // First parameter value start
+        if FIRST_PARAM_NAME_START_PATTERN.iter().any(|&p| content.ends_with(p)) {
+            self.state = Some("parameter_name".to_string());
+        }
+
+        // Parameter value extraction logic
+        if self.state.as_deref() == Some("parameter_value")
+            && !PARAMETER_VALUE_END_TOKEN.iter().any(|&t| content.ends_with(t)) {
+
+            // Check for brackets
+            if let Some(last_token) = self.tokens.last() {
+                let open_brackets: Vec<char> = last_token
+                    .trim()
+                    .chars()
+                    .filter(|&c| c == '(' || c == '{' || c == '[')
+                    .collect();
+
+                if !open_brackets.is_empty() {
+                    self.open_bracket = true;
+                    self.bracket = Some(open_brackets[0]);
+                }
+
+                if self.open_bracket {
+                    let closing = match self.bracket {
+                        Some('(') => ')',
+                        Some('{') => '}',
+                        Some('[') => ']',
+                        _ => '\0',
+                    };
+                    if last_token.trim().contains(closing) {
+                        self.open_bracket = false;
+                        self.bracket = None;
+                    }
+                }
+
+                // Check if token has actual value content
+                let has_non_punct = last_token.trim().chars().any(|c| !c.is_ascii_punctuation());
+                if has_non_punct && !last_token.trim().is_empty() {
+                    self.mask.push(MaskToken::ParameterValue);
+
+                    // Check hallucination for required parameters without enum
+                    if self.function_properties.contains_key(&self.function_name) {
+                        if self.mask.len() > 1
+                            && self.mask[self.mask.len() - 2] != MaskToken::ParameterValue
+                            && !self.parameter_name.is_empty()
+                        {
+                            let last_param = self.parameter_name[self.parameter_name.len() - 1].clone();
+                            if let Some(func_props) = self.function_properties.get(&self.function_name) {
+                                if is_parameter_required(func_props, &last_param)
+                                    && !is_parameter_property(func_props, &last_param, "enum")
+                                    && !self.check_parameter_name.contains_key(&last_param)
+                                {
+                                    self.check_logprob();
+                                    self.check_parameter_name.insert(last_param, true);
+                                }
+                            }
+                        }
+                    } else if !self.function_name.is_empty() {
+                        self.check_logprob();
+                        self.error_message = format!(
+                            "Function name {} not found in function properties",
+                            self.function_name
+                        );
+                    }
+                } else {
+                    self.mask.push(MaskToken::NotUsed);
+                }
+            }
+        } else if self.state.as_deref() == Some("parameter_value")
+            && !self.open_bracket
+            && PARAMETER_VALUE_END_TOKEN.iter().any(|&t| content.ends_with(t)) {
+            self.state = None;
+        } else if self.parameter_name_done
+            && PARAMETER_VALUE_START_PATTERN.iter().any(|&p| content.ends_with(p)) {
+            self.state = Some("parameter_value".to_string());
+        }
+
+        // Maintain consistency between tokens and mask
+        if self.mask.len() != self.tokens.len() {
+            self.mask.push(MaskToken::NotUsed);
+        }
+    }
+
+    /// Checks log probability and detects hallucination
+    fn check_logprob(&mut self) {
+        if let Some(probs) = self.logprobs.last() {
+            let metrics = calculate_uncertainty(probs);
+
+            if let Some(token) = self.tokens.last() {
+                self.token_probs_map.push((
+                    token.clone(),
+                    metrics.entropy,
+                    metrics.varentropy,
+                    metrics.probability,
+                ));
+
+                if check_threshold(metrics.entropy, metrics.varentropy, &self.thresholds) {
+                    self.hallucination = true;
+                    self.error_message = format!(
+                        "token '{}' is uncertain. Generated response:\n{}",
+                        token,
+                        self.tokens.join("")
+                    );
+                }
+            }
+        }
+    }
+
+    /// Counts consecutive tokens of a specific type in the mask
+    fn count_consecutive_token(&self, token_type: MaskToken) -> usize {
+        if self.mask.is_empty() || self.mask.last() != Some(&token_type) {
+            return 0;
+        }
+
+        self.mask
+            .iter()
+            .rev()
+            .take_while(|&&t| t == token_type)
+            .count()
+    }
+
+    /// Extracts the parameter name from recent tokens
+    fn get_parameter_name(&mut self) {
+        let p_len = self.count_consecutive_token(MaskToken::ParameterName);
+        if p_len > 0 && self.tokens.len() > 1 {
+            let start_idx = self.tokens.len().saturating_sub(p_len + 1);
+            let end_idx = self.tokens.len().saturating_sub(1);
+            let parameter_name: String = self.tokens[start_idx..end_idx].join("");
+            self.parameter_name.push(parameter_name);
+        }
+    }
+
+    /// Extracts the function name from recent tokens
+    fn get_function_name(&mut self) {
+        let f_len = self.count_consecutive_token(MaskToken::FunctionName);
+        if f_len > 0 && self.tokens.len() > 1 {
+            let start_idx = self.tokens.len().saturating_sub(f_len + 1);
+            let end_idx = self.tokens.len().saturating_sub(1);
+            self.function_name = self.tokens[start_idx..end_idx].join("");
+        }
+    }
+}
+
+#[cfg(test)]
+mod hallucination_tests {
+    use super::*;
+
+    #[test]
+    fn test_calculate_uncertainty() {
+        let log_probs = vec![-0.1, -2.0, -3.0];
+        let metrics = calculate_uncertainty(&log_probs);
+        assert!(metrics.entropy >= 0.0);
+        assert!(metrics.varentropy >= 0.0);
+        assert!(metrics.probability > 0.0 && metrics.probability <= 1.0);
+    }
+
+    #[test]
+    fn test_calculate_uncertainty_empty() {
+        let log_probs: Vec<f64> = vec![];
+        let metrics = calculate_uncertainty(&log_probs);
+        assert_eq!(metrics.entropy, 0.0);
+        assert_eq!(metrics.varentropy, 0.0);
+        assert_eq!(metrics.probability, 0.0);
+    }
+
+    #[test]
+    fn test_check_threshold() {
+        let thresholds = HallucinationThresholds::default();
+        assert!(check_threshold(0.001, 0.001, &thresholds));
+        assert!(!check_threshold(0.00001, 0.00001, &thresholds));
+    }
+
+    #[test]
+    fn test_is_parameter_required() {
+        let func_desc = json!({
+            "required": ["param1", "param2"]
+        });
+        assert!(is_parameter_required(&func_desc, "param1"));
+        assert!(!is_parameter_required(&func_desc, "param3"));
+    }
+
+    #[test]
+    fn test_is_parameter_property() {
+        let func_desc = json!({
+            "properties": {
+                "param1": {
+                    "type": "string",
+                    "enum": ["a", "b"]
+                }
+            }
+        });
+        assert!(is_parameter_property(&func_desc, "param1", "enum"));
+        assert!(!is_parameter_property(&func_desc, "param1", "default"));
+    }
+
+    #[test]
+    fn test_check_value_type() {
+        let handler = ArchFunctionHandler::new(
+            "test-model".to_string(),
+            ArchFunctionConfig::default(),
+            "http://localhost:8000".to_string()
+        );
+
+        // Test integer types
+        assert!(handler.check_value_type(&json!(42), "integer"));
+        assert!(handler.check_value_type(&json!(42), "int"));
+        assert!(!handler.check_value_type(&json!(3.14), "integer"));
+
+        // Test number types (accepts both int and float)
+        assert!(handler.check_value_type(&json!(3.14), "number"));
+        assert!(handler.check_value_type(&json!(42), "number"));
+        assert!(handler.check_value_type(&json!(3.14), "float"));
+
+        // Test boolean
+        assert!(handler.check_value_type(&json!(true), "boolean"));
+        assert!(handler.check_value_type(&json!(false), "bool"));
+        assert!(!handler.check_value_type(&json!("true"), "boolean"));
+
+        // Test string
+        assert!(handler.check_value_type(&json!("hello"), "string"));
+        assert!(handler.check_value_type(&json!("hello"), "str"));
+        assert!(!handler.check_value_type(&json!(123), "string"));
+
+        // Test array
+        assert!(handler.check_value_type(&json!([1, 2, 3]), "array"));
+        assert!(handler.check_value_type(&json!([1, 2, 3]), "list"));
+        assert!(!handler.check_value_type(&json!({}), "array"));
+
+        // Test object
+        assert!(handler.check_value_type(&json!({"key": "value"}), "object"));
+        assert!(handler.check_value_type(&json!({"key": "value"}), "dict"));
+        assert!(!handler.check_value_type(&json!([]), "object"));
+
+        // Test unknown type (should return true)
+        assert!(handler.check_value_type(&json!(42), "unknown_type"));
+    }
+
+    #[test]
+    fn test_validate_or_convert_parameter() {
+        let handler = ArchFunctionHandler::new(
+            "test-model".to_string(),
+            ArchFunctionConfig::default(),
+            "http://localhost:8000".to_string()
+        );
+
+        // Test valid type - no conversion needed
+        assert!(handler.validate_or_convert_parameter(&json!(42), "integer").unwrap());
+        assert!(handler.validate_or_convert_parameter(&json!("hello"), "string").unwrap());
+
+        // Test integer to float conversion (convert_data_type supports this)
+        let result = handler.validate_or_convert_parameter(&json!(42), "float");
+        assert!(result.is_ok());
+        assert!(result.unwrap()); // Should be valid after conversion
+
+        // Test invalid type that cannot be converted
+        // A string cannot be converted to integer (convert_data_type doesn't support this)
+        let result = handler.validate_or_convert_parameter(&json!("abc"), "integer");
+        // Since convert_data_type returns Ok(value.clone()) for unsupported conversions,
+        // the validation will fail because "abc" string is not an integer
+        assert!(!result.unwrap());
+
+        // Test number accepting both int and float
+        assert!(handler.validate_or_convert_parameter(&json!(42), "number").unwrap());
+        assert!(handler.validate_or_convert_parameter(&json!(3.14), "number").unwrap());
+    }
+
+    #[test]
+    fn test_hallucination_state_new() {
+        let tools = vec![Tool {
+            tool_type: "function".to_string(),
+            function: hermesllm::apis::openai::Function {
+                name: "test_func".to_string(),
+                description: Some("Test function".to_string()),
+                parameters: json!({"type": "object"}),
+                strict: None,
+            },
+        }];
+
+        let state = HallucinationState::new(&tools);
+        assert_eq!(state.tokens.len(), 0);
+        assert!(!state.hallucination);
+        assert!(state.function_properties.contains_key("test_func"));
+    }
+}
diff --git a/crates/brightstaff/src/handlers/mod.rs b/crates/brightstaff/src/handlers/mod.rs
index 66c5449b..2583b41e 100644
--- a/crates/brightstaff/src/handlers/mod.rs
+++ b/crates/brightstaff/src/handlers/mod.rs
@@ -1,9 +1,11 @@
 pub mod agent_chat_completions;
 pub mod agent_selector;
-pub mod chat_completions;
+pub mod router;
 pub mod models;
+pub mod function_calling;
 pub mod pipeline_processor;
 pub mod response_handler;
+pub mod utils;
 
 #[cfg(test)]
 mod integration_tests;
diff --git a/crates/brightstaff/src/handlers/chat_completions.rs b/crates/brightstaff/src/handlers/router.rs
similarity index 89%
rename from crates/brightstaff/src/handlers/chat_completions.rs
rename to crates/brightstaff/src/handlers/router.rs
index 1b15e389..d27bab55 100644
--- a/crates/brightstaff/src/handlers/chat_completions.rs
+++ b/crates/brightstaff/src/handlers/router.rs
@@ -6,18 +6,15 @@ use hermesllm::clients::endpoints::SupportedUpstreamAPIs;
 use hermesllm::clients::SupportedAPIs;
 use hermesllm::{ProviderRequest, ProviderRequestType};
 use http_body_util::combinators::BoxBody;
-use http_body_util::{BodyExt, Full, StreamBody};
-use hyper::body::Frame;
+use http_body_util::{BodyExt, Full};
 use hyper::header::{self};
 use hyper::{Request, Response, StatusCode};
 use std::collections::HashMap;
 use std::sync::Arc;
-use tokio::sync::mpsc;
-use tokio_stream::wrappers::ReceiverStream;
-use tokio_stream::StreamExt;
 use tracing::{debug, info, warn};
 
 use crate::router::llm_router::RouterService;
+use crate::handlers::utils::{create_streaming_response, PassthroughProcessor};
 
 fn full<T: Into<Bytes>>(chunk: T) -> BoxBody<Bytes, hyper::Error> {
     Full::new(chunk.into())
@@ -25,7 +22,7 @@ fn full<T: Into<Bytes>>(chunk: T) -> BoxBody<Bytes, hyper::Error> {
         .boxed()
 }
 
-pub async fn chat(
+pub async fn router_chat(
     request: Request<hyper::body::Incoming>,
     router_service: Arc<RouterService>,
     full_qualified_llm_provider_url: String,
@@ -237,34 +234,12 @@ pub async fn chat(
         headers.insert(header_name, header_value.clone());
     }
 
-    // channel to create async stream
-    let (tx, rx) = mpsc::channel::<Bytes>(16);
+    // Use the streaming utility with a passthrough processor (no modification of chunks)
+    let byte_stream = llm_response.bytes_stream();
+    let processor = PassthroughProcessor;
+    let streaming_response = create_streaming_response(byte_stream, processor, 16);
 
-    // Spawn a task to send data as it becomes available
-    tokio::spawn(async move {
-        let mut byte_stream = llm_response.bytes_stream();
-
-        while let Some(item) = byte_stream.next().await {
-            let item = match item {
-                Ok(item) => item,
-                Err(err) => {
-                    warn!("Error receiving chunk: {:?}", err);
-                    break;
-                }
-            };
-
-            if tx.send(item).await.is_err() {
-                warn!("Receiver dropped");
-                break;
-            }
-        }
-    });
-
-    let stream = ReceiverStream::new(rx).map(|chunk| Ok::<_, hyper::Error>(Frame::data(chunk)));
-
-    let stream_body = BoxBody::new(StreamBody::new(stream));
-
-    match response.body(stream_body) {
+    match response.body(streaming_response.body) {
         Ok(response) => Ok(response),
         Err(err) => {
             let err_msg = format!("Failed to create response: {}", err);
diff --git a/crates/brightstaff/src/handlers/utils.rs b/crates/brightstaff/src/handlers/utils.rs
new file mode 100644
index 00000000..2d000874
--- /dev/null
+++ b/crates/brightstaff/src/handlers/utils.rs
@@ -0,0 +1,93 @@
+use bytes::Bytes;
+use http_body_util::combinators::BoxBody;
+use http_body_util::StreamBody;
+use hyper::body::Frame;
+use tokio::sync::mpsc;
+use tokio_stream::wrappers::ReceiverStream;
+use tokio_stream::StreamExt;
+use tracing::warn;
+
+/// Trait for processing streaming chunks
+/// Implementors can inject custom logic during streaming (e.g., hallucination detection, logging)
+pub trait StreamProcessor: Send + 'static {
+    /// Process an incoming chunk of bytes
+    fn process_chunk(&mut self, chunk: Bytes) -> Result<Option<Bytes>, String>;
+
+    /// Called when streaming completes successfully
+    fn on_complete(&mut self) {}
+
+    /// Called when streaming encounters an error
+    fn on_error(&mut self, _error: &str) {}
+}
+
+/// A no-op processor that just forwards chunks as-is
+pub struct PassthroughProcessor;
+
+impl StreamProcessor for PassthroughProcessor {
+    fn process_chunk(&mut self, chunk: Bytes) -> Result<Option<Bytes>, String> {
+        Ok(Some(chunk))
+    }
+}
+
+/// Result of creating a streaming response
+pub struct StreamingResponse {
+    pub body: BoxBody<Bytes, hyper::Error>,
+    pub processor_handle: tokio::task::JoinHandle<()>,
+}
+
+pub fn create_streaming_response<S, P>(
+    mut byte_stream: S,
+    mut processor: P,
+    buffer_size: usize,
+) -> StreamingResponse
+where
+    S: StreamExt<Item = Result<Bytes, reqwest::Error>> + Send + Unpin + 'static,
+    P: StreamProcessor,
+{
+    let (tx, rx) = mpsc::channel::<Bytes>(buffer_size);
+
+    // Spawn a task to process and forward chunks
+    let processor_handle = tokio::spawn(async move {
+        while let Some(item) = byte_stream.next().await {
+            let chunk = match item {
+                Ok(chunk) => chunk,
+                Err(err) => {
+                    let err_msg = format!("Error receiving chunk: {:?}", err);
+                    warn!("{}", err_msg);
+                    processor.on_error(&err_msg);
+                    break;
+                }
+            };
+
+            // Process the chunk
+            match processor.process_chunk(chunk) {
+                Ok(Some(processed_chunk)) => {
+                    if tx.send(processed_chunk).await.is_err() {
+                        warn!("Receiver dropped");
+                        break;
+                    }
+                }
+                Ok(None) => {
+                    // Skip this chunk
+                    continue;
+                }
+                Err(err) => {
+                    warn!("Processor error: {}", err);
+                    processor.on_error(&err);
+                    break;
+                }
+            }
+        }
+
+        processor.on_complete();
+    });
+
+    // Convert channel receiver to HTTP stream
+    let stream = ReceiverStream::new(rx).map(|chunk| Ok::<_, hyper::Error>(Frame::data(chunk)));
+    let stream_body = BoxBody::new(StreamBody::new(stream));
+
+    StreamingResponse {
+        body: stream_body,
+        processor_handle,
+    }
+}
diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs
index 57dd9fe9..87bdea36 100644
--- a/crates/brightstaff/src/main.rs
+++ b/crates/brightstaff/src/main.rs
@@ -1,6 +1,7 @@
 use brightstaff::handlers::agent_chat_completions::agent_chat;
-use brightstaff::handlers::chat_completions::chat;
+use brightstaff::handlers::router::router_chat;
 use brightstaff::handlers::models::list_models;
+use brightstaff::handlers::function_calling::{function_calling_chat_handler};
 use brightstaff::router::llm_router::RouterService;
 use brightstaff::utils::tracing::init_tracer;
 use bytes::Bytes;
@@ -125,7 +126,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
                     (&Method::POST, CHAT_COMPLETIONS_PATH | MESSAGES_PATH) => {
                         let fully_qualified_url =
                             format!("{}{}", llm_provider_url, req.uri().path());
-                        chat(req, router_service, fully_qualified_url, model_aliases)
+                        router_chat(req, router_service, fully_qualified_url, model_aliases)
                             .with_context(parent_cx)
                             .await
                     }
@@ -142,6 +143,14 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
                         .with_context(parent_cx)
                         .await
                     }
+
+                    (&Method::POST, "/function_calling") => {
+                        let fully_qualified_url =
+                            format!("{}{}", llm_provider_url, "/v1/chat/completions");
+                        function_calling_chat_handler(req, fully_qualified_url)
+                            .with_context(parent_cx)
+                            .await
+                    }
                     (&Method::GET, "/v1/models" | "/agents/v1/models") => {
                         Ok(list_models(llm_providers).await)
                     }
diff --git a/crates/common/src/api/open_ai.rs b/crates/common/src/api/open_ai.rs
index 080923c1..951bfaf5 100644
--- a/crates/common/src/api/open_ai.rs
+++ b/crates/common/src/api/open_ai.rs
@@ -4,7 +4,6 @@ use crate::{
 };
 use core::{panic, str};
 use serde::{ser::SerializeMap, Deserialize, Serialize};
-use serde_yaml::Value;
 use std::{
     collections::{HashMap, VecDeque},
     fmt::Display,
@@ -265,7 +264,7 @@ pub struct ToolCall {
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct FunctionCallDetail {
     pub name: String,
-    pub arguments: Option<HashMap<String, Value>>,
+    pub arguments: String,
 }
 
 #[derive(Debug, Deserialize, Serialize)]
diff --git a/crates/common/src/consts.rs b/crates/common/src/consts.rs
index 13624d8d..8edbff1a 100644
--- a/crates/common/src/consts.rs
+++ b/crates/common/src/consts.rs
@@ -7,7 +7,7 @@ pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
 pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
 pub const API_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
 pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
-pub const MODEL_SERVER_NAME: &str = "model_server";
+pub const MODEL_SERVER_NAME: &str = "bright_staff";
 pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
 pub const MESSAGES_KEY: &str = "messages";
 pub const ARCH_PROVIDER_HINT_HEADER: &str = "x-arch-llm-provider-hint";
diff --git a/crates/common/src/routing.rs b/crates/common/src/routing.rs
index f4baf896..8813c92d 100644
--- a/crates/common/src/routing.rs
+++ b/crates/common/src/routing.rs
@@ -40,8 +40,14 @@ pub fn get_llm_provider(
     let mut rng = thread_rng();
     llm_providers
         .iter()
+        .filter(|(_, provider)| {
+            provider.model
+                .as_ref()
+                .map(|m| !m.starts_with("Arch"))
+                .unwrap_or(true)
+        })
         .choose(&mut rng)
-        .expect("There should always be at least one llm provider")
+        .expect("There should always be at least one non-Arch llm provider")
         .1
         .clone()
 }
diff --git a/crates/hermesllm/src/apis/openai.rs b/crates/hermesllm/src/apis/openai.rs
index 82c5d1a1..44b64485 100644
--- a/crates/hermesllm/src/apis/openai.rs
+++ b/crates/hermesllm/src/apis/openai.rs
@@ -101,6 +101,12 @@ pub struct ChatCompletionsRequest {
     pub top_logprobs: Option<u32>,
     pub user: Option<String>,
     // pub web_search: Option<bool>, // GOOD FIRST ISSUE: Future support for web search
+
+    // VLLM-specific parameters (used by Arch-Function)
+    pub top_k: Option<u32>,
+    pub stop_token_ids: Option<Vec<u32>>,
+    pub continue_final_message: Option<bool>,
+    pub add_generation_prompt: Option<bool>,
 }
 
 impl ChatCompletionsRequest {
@@ -385,6 +391,8 @@ pub struct ChatCompletionsResponse {
     pub usage: Usage,
     pub system_fingerprint: Option<String>,
     pub service_tier: Option<String>,
+    // This isn't a standard OpenAI field, but we include it for extensibility
+    pub metadata: Option<HashMap<String, Value>>,
 }
 
 impl Default for ChatCompletionsResponse {
@@ -398,6 +406,7 @@ impl Default for ChatCompletionsResponse {
             usage: Usage::default(),
             system_fingerprint: None,
             service_tier: None,
+            metadata: None,
         }
     }
 }
diff --git a/crates/hermesllm/src/providers/response.rs b/crates/hermesllm/src/providers/response.rs
index f09b2c04..54fda8c4 100644
--- a/crates/hermesllm/src/providers/response.rs
+++ b/crates/hermesllm/src/providers/response.rs
@@ -316,6 +316,17 @@ impl TryFrom<(SseEvent, &SupportedAPIs, &SupportedUpstreamAPIs)> for SseEvent {
         // Create a new transformed event based on the original
         let mut transformed_event = sse_event;
 
+        // Handle [DONE] marker early - don't try to parse as JSON
+        if transformed_event.is_done() {
+            // For OpenAI client API, keep [DONE] as-is
+            // For Anthropic client API, it will be transformed via ProviderStreamResponseType
+            if matches!(client_api, SupportedAPIs::OpenAIChatCompletions(_)) {
+                // Keep the [DONE] marker as-is for OpenAI clients
+                transformed_event.sse_transform_buffer = "data: [DONE]".to_string();
+                return Ok(transformed_event);
+            }
+        }
+
         // If has data, parse the data as a provider stream response (business logic layer)
         if transformed_event.data.is_some() {
             let data_str = transformed_event.data.as_ref().unwrap();
diff --git a/crates/hermesllm/src/transforms/response/to_openai.rs b/crates/hermesllm/src/transforms/response/to_openai.rs
index acbdb420..b44afc96 100644
--- a/crates/hermesllm/src/transforms/response/to_openai.rs
+++ b/crates/hermesllm/src/transforms/response/to_openai.rs
@@ -83,8 +83,7 @@ impl TryFrom<MessagesResponse> for ChatCompletionsResponse {
             model: resp.model,
             choices: vec![choice],
             usage,
-            system_fingerprint: None,
-            service_tier: None,
+            ..Default::default()
         })
     }
 }
@@ -169,8 +168,7 @@ impl TryFrom<ConverseResponse> for ChatCompletionsResponse {
             model,
             choices: vec![choice],
             usage,
-            system_fingerprint: None,
-            service_tier: None,
+            ..Default::default()
         })
     }
 }
diff --git a/crates/prompt_gateway/src/stream_context.rs b/crates/prompt_gateway/src/stream_context.rs
index 96caa378..1e9d507b 100644
--- a/crates/prompt_gateway/src/stream_context.rs
+++ b/crates/prompt_gateway/src/stream_context.rs
@@ -264,13 +264,6 @@ impl StreamContext {
             .tool_calls
             .clone_into(&mut self.tool_calls);
 
-        if self.tool_calls.as_ref().unwrap().len() > 1 {
-            warn!(
-                "multiple tool calls not supported yet, tool_calls count found: {}",
-                self.tool_calls.as_ref().unwrap().len()
-            );
-        }
-
         if self.tool_calls.is_none() || self.tool_calls.as_ref().unwrap().is_empty() {
             // This means that Arch FC did not have enough information to resolve the function call
             // Arch FC probably responded with a message asking for more information.
@@ -314,6 +307,14 @@ impl StreamContext {
             );
         }
 
+        // At this point, we know tool_calls is not None and not empty
+        if self.tool_calls.as_ref().unwrap().len() > 1 {
+            warn!(
+                "multiple tool calls not supported yet, tool_calls count found: {}",
+                self.tool_calls.as_ref().unwrap().len()
+            );
+        }
+
         // update prompt target name from the tool call response
         callout_context.prompt_target_name =
             Some(self.tool_calls.as_ref().unwrap()[0].function.name.clone());
@@ -371,7 +372,26 @@ impl StreamContext {
 
         let tools_call_name = self.tool_calls.as_ref().unwrap()[0].function.name.clone();
         let prompt_target = self.prompt_targets.get(&tools_call_name).unwrap().clone();
-        let tool_params = &self.tool_calls.as_ref().unwrap()[0].function.arguments;
+        let tool_params_str = &self.tool_calls.as_ref().unwrap()[0].function.arguments;
+
+        // Parse arguments JSON string into HashMap
+        // Note: convert from serde_json::Value to serde_yaml::Value for compatibility
+        let tool_params: Option<HashMap<String, serde_yaml::Value>> = match serde_json::from_str::<HashMap<String, serde_json::Value>>(tool_params_str) {
+            Ok(json_params) => {
+                let yaml_params: HashMap<String, serde_yaml::Value> = json_params
+                    .into_iter()
+                    .filter_map(|(k, v)| {
+                        serde_yaml::to_value(&v).ok().map(|yaml_v| (k, yaml_v))
+                    })
+                    .collect();
+                Some(yaml_params)
+            },
+            Err(e) => {
+                warn!("Failed to parse tool call arguments: {}", e);
+                None
+            }
+        };
+
         let endpoint_details = prompt_target.endpoint.as_ref().unwrap();
         let endpoint_path: String = endpoint_details
             .path
@@ -384,7 +404,7 @@ impl StreamContext {
 
         let (path, api_call_body) = match compute_request_path_body(
             &endpoint_path,
-            tool_params,
+            &tool_params,
             &prompt_target_params,
             &http_method,
         ) {
@@ -870,7 +890,7 @@ mod test {
                         id: "1".to_string(),
                         function: common::api::open_ai::FunctionCallDetail {
                             name: "test".to_string(),
-                            arguments: None,
+                            arguments: "{}".to_string(),
                         },
                         tool_type: common::api::open_ai::ToolType::Function,
                     }]),
diff --git a/demos/samples_python/human_resources_agent/requirements.txt b/demos/samples_python/human_resources_agent/requirements.txt
index 9a108c37..aaaff081 100644
--- a/demos/samples_python/human_resources_agent/requirements.txt
+++ b/demos/samples_python/human_resources_agent/requirements.txt
@@ -4,6 +4,7 @@ slack-sdk
 typing
 pandas
 gradio==5.3.0
+huggingface_hub<1.0.0
 async_timeout==4.0.3
 loguru==0.7.2
 asyncio==3.4.3
diff --git a/demos/samples_python/multi_turn_rag_agent/requirements.txt b/demos/samples_python/multi_turn_rag_agent/requirements.txt
index a555b460..d6a88e83 100644
--- a/demos/samples_python/multi_turn_rag_agent/requirements.txt
+++ b/demos/samples_python/multi_turn_rag_agent/requirements.txt
@@ -3,6 +3,7 @@ uvicorn
 typing
 pandas
 gradio==5.3.0
+huggingface_hub<1.0.0
 async_timeout==4.0.3
 loguru==0.7.2
 asyncio==3.4.3
diff --git a/demos/samples_python/network_switch_operator_agent/requirements.txt b/demos/samples_python/network_switch_operator_agent/requirements.txt
index 8aa2003a..52913a01 100644
--- a/demos/samples_python/network_switch_operator_agent/requirements.txt
+++ b/demos/samples_python/network_switch_operator_agent/requirements.txt
@@ -4,6 +4,7 @@ pydantic
 typing
 pandas
 gradio==5.3.0
+huggingface_hub<1.0.0
 async_timeout==4.0.3
 loguru==0.7.2
 asyncio==3.4.3
diff --git a/demos/shared/chatbot_ui/requirements.txt b/demos/shared/chatbot_ui/requirements.txt
index da4ac00b..7d94088a 100644
--- a/demos/shared/chatbot_ui/requirements.txt
+++ b/demos/shared/chatbot_ui/requirements.txt
@@ -1,4 +1,5 @@
 gradio==5.3.0
+huggingface_hub<1.0.0
 async_timeout==4.0.3
 loguru==0.7.2
 asyncio==3.4.3
diff --git a/demos/use_cases/orchestrating_agents/Dockerfile b/demos/use_cases/orchestrating_agents/Dockerfile
deleted file mode 100644
index b53cb719..00000000
--- a/demos/use_cases/orchestrating_agents/Dockerfile
+++ /dev/null
@@ -1,41 +0,0 @@
-# took inspiration from https://medium.com/@albertazzir/blazing-fast-python-docker-builds-with-poetry-a78a66f5aed0
-
-# The builder image, used to build the virtual environment
-FROM python:3.10 as builder
-
-RUN pip install poetry==1.8.3
-
-ENV POETRY_NO_INTERACTION=1 \
-    POETRY_VIRTUALENVS_IN_PROJECT=1 \
-    POETRY_VIRTUALENVS_CREATE=1 \
-    POETRY_CACHE_DIR=/tmp/poetry_cache
-
-WORKDIR /code
-
-COPY pyproject.toml poetry.lock ./
-RUN touch README.md
-
-RUN poetry install --no-root && rm -rf $POETRY_CACHE_DIR
-
-# The runtime image, used to just run the code provided its virtual environment
-FROM python:3.10-slim as runtime
-
-RUN apt-get update && apt-get install -y curl
-
-WORKDIR /code
-
-ENV VIRTUAL_ENV=/code/.venv \
-    PATH="/code/.venv/bin:$PATH"
-
-COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
-
-COPY main.py ./
-
-HEALTHCHECK \
-    --interval=5s \
-    --timeout=1s \
-    --start-period=1s \
-    --retries=3 \
-    CMD curl http://localhost:80/healthz
-
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80", "--log-level", "debug"]
diff --git a/demos/use_cases/orchestrating_agents/arch_config.yaml b/demos/use_cases/orchestrating_agents/arch_config.yaml
deleted file mode 100644
index b52ceb22..00000000
--- a/demos/use_cases/orchestrating_agents/arch_config.yaml
+++ /dev/null
@@ -1,44 +0,0 @@
-version: v0.1.0
-
-listeners:
-  ingress_traffic:
-    address: 0.0.0.0
-    port: 10000
-    message_format: openai
-    timeout: 30s
-
-  egress_traffic:
-    address: 0.0.0.0
-    port: 12000
-    message_format: openai
-    timeout: 30s
-
-overrides:
-  use_agent_orchestrator: true
-
-endpoints:
-  agent_gateway:
-    endpoint: host.docker.internal:18083
-    connect_timeout: 0.005s
-
-llm_providers:
-  - access_key: $OPENAI_API_KEY
-    model: openai/gpt-4o-mini
-    default: true
-
-system_prompt: |
-  You are a helpful assistant.
-
-prompt_targets:
-  - name: sales_agent
-    description: handles queries related to sales and purchases
-
-  - name: issues_and_repairs
-    description: handles issues, repairs, or refunds
-
-  - name: escalate_to_human
-    description: escalates to human agent
-
-tracing:
-  random_sampling: 100
-  trace_arch_internal: true
diff --git a/demos/use_cases/orchestrating_agents/docker-compose.yaml b/demos/use_cases/orchestrating_agents/docker-compose.yaml
deleted file mode 100644
index 288ecf30..00000000
--- a/demos/use_cases/orchestrating_agents/docker-compose.yaml
+++ /dev/null
@@ -1,29 +0,0 @@
-services:
-  triage_service:
-    build:
-      context: ./
-    environment:
-      - OLTP_HOST=http://jaeger:4317
-    extra_hosts:
-      - "host.docker.internal:host-gateway"
-    ports:
-      - "18083:80"
-
-  chatbot_ui:
-    build:
-      context: ../../shared/chatbot_ui
-    ports:
-      - "18080:8080"
-    environment:
-      # this is only because we are running the sample app in the same docker container environemtn as archgw
-      - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1
-    extra_hosts:
-      - "host.docker.internal:host-gateway"
-
-  jaeger:
-    build:
-      context: ../../shared/jaeger
-    ports:
-      - "16686:16686"
-      - "4317:4317"
-      - "4318:4318"
diff --git a/demos/use_cases/orchestrating_agents/hurl_tests/simple_issues_repairs.hurl b/demos/use_cases/orchestrating_agents/hurl_tests/simple_issues_repairs.hurl
deleted file mode 100644
index 3820de57..00000000
--- a/demos/use_cases/orchestrating_agents/hurl_tests/simple_issues_repairs.hurl
+++ /dev/null
@@ -1,19 +0,0 @@
-POST http://localhost:10000/v1/chat/completions
-Content-Type: application/json
-
-{
-  "messages": [
-    {
-      "role": "user",
-      "content": "I bought a package recently and it not working properly"
-    }
-  ]
-}
-HTTP 200
-[Asserts]
-header "content-type" == "application/json"
-jsonpath "$.model" matches /^gpt-4o-2/
-jsonpath "$.metadata.x-arch-state" != null
-jsonpath "$.usage" != null
-jsonpath "$.choices[0].message.content" != null
-jsonpath "$.choices[0].message.role" == "assistant"
diff --git a/demos/use_cases/orchestrating_agents/hurl_tests/simple_sale_agent.hurl b/demos/use_cases/orchestrating_agents/hurl_tests/simple_sale_agent.hurl
deleted file mode 100644
index 4db2c67c..00000000
--- a/demos/use_cases/orchestrating_agents/hurl_tests/simple_sale_agent.hurl
+++ /dev/null
@@ -1,19 +0,0 @@
-POST http://localhost:10000/v1/chat/completions
-Content-Type: application/json
-
-{
-  "messages": [
-    {
-      "role": "user",
-      "content": "I want to sell red shoes"
-    }
-  ]
-}
-HTTP 200
-[Asserts]
-header "content-type" == "application/json"
-jsonpath "$.model" matches /^gpt-4o-mini/
-jsonpath "$.metadata.x-arch-state" != null
-jsonpath "$.usage" != null
-jsonpath "$.choices[0].message.content" != null
-jsonpath "$.choices[0].message.role" == "assistant"
diff --git a/demos/use_cases/orchestrating_agents/hurl_tests/simple_stream.hurl b/demos/use_cases/orchestrating_agents/hurl_tests/simple_stream.hurl
deleted file mode 100644
index f060fed0..00000000
--- a/demos/use_cases/orchestrating_agents/hurl_tests/simple_stream.hurl
+++ /dev/null
@@ -1,16 +0,0 @@
-POST http://localhost:10000/v1/chat/completions
-Content-Type: application/json
-
-{
-  "messages": [
-    {
-      "role": "user",
-      "content": "I want to sell red shoes"
-    }
-  ],
-  "stream": true
-}
-HTTP 200
-[Asserts]
-header "content-type" matches /text\/event-stream/
-body matches /^data: .*?sales_agent.*?\n/
diff --git a/demos/use_cases/orchestrating_agents/main.py b/demos/use_cases/orchestrating_agents/main.py
deleted file mode 100644
index 4f288e50..00000000
--- a/demos/use_cases/orchestrating_agents/main.py
+++ /dev/null
@@ -1,115 +0,0 @@
-import logging
-import json
-from typing import List, Dict, Any
-from fastapi import FastAPI, Request
-from fastapi.responses import StreamingResponse
-from pydantic import BaseModel
-import openai
-
-# Setup logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger("uvicorn.error")
-
-app = FastAPI()
-
-
-class Message(BaseModel):
-    role: str
-    content: str
-
-
-class ChatCompletionsRequest(BaseModel):
-    messages: List[Message]
-    model: str
-    metadata: Dict[str, Any] = {}
-    stream: bool = False
-
-
-openai_client = openai.OpenAI(
-    api_key="None",  # archgw picks the API key from the config file
-    base_url="http://host.docker.internal:12000/v1",
-)
-
-
-def call_openai(messages: List[Dict[str, str]], stream: bool, model: str):
-    logger.info(f"llm agent model: {model}")
-    completion = openai_client.chat.completions.create(
-        model=model,
-        messages=messages,
-        stream=stream,
-    )
-
-    if stream:
-
-        def stream():
-            for line in completion:
-                if line.choices and len(line.choices) > 0 and line.choices[0].delta:
-                    chunk_response_str = json.dumps(line.model_dump())
-                    yield "data: " + chunk_response_str + "\n\n"
-            yield "data: [DONE]" + "\n\n"
-
-        return StreamingResponse(stream(), media_type="text/event-stream")
-    else:
-        return completion
-
-
-class Agent:
-    def __init__(self, role: str, instructions: str, model: str = ""):
-        self.model = model
-        self.system_prompt = f"You are a {role}.\n{instructions}"
-
-    def handle(self, req: ChatCompletionsRequest):
-        messages = [{"role": "system", "content": self.get_system_prompt()}] + [
-            message.model_dump() for message in req.messages
-        ]
-
-        model = req.model
-        if self.model:
-            model = self.model
-        return call_openai(messages, req.stream, model)
-
-    def get_system_prompt(self) -> str:
-        return self.system_prompt
-
-
-# Define your agents
-AGENTS = {
-    "sales_agent": Agent(
-        role="sales agent",
-        instructions=(
-            "Always answer in a sentence or less.\n"
-            "Follow the following routine with the user:\n"
-            "1. Engage\n"
-            "2. Quote ridiculous price\n"
-            "3. Reveal caveat if user agrees."
-        ),
-        model="gpt-4o-mini",
-    ),
-    "issues_and_repairs": Agent(
-        role="issues and repairs agent",
-        instructions="Propose a solution, offer refund if necessary.",
-        model="gpt-4o",
-    ),
-    "escalate_to_human": Agent(
-        role="human escalation agent",
-        instructions="Escalate issues to a human.",
-        # skipping model name here as arch gateway will pick the default model from the config file
-    ),
-    "unknown_agent": Agent(
-        role="general assistant", instructions="Assist the user in general queries."
-    ),
-}
-
-
-@app.post("/v1/chat/completions")
-def completion_api(req: ChatCompletionsRequest, request: Request):
-    agent_name = req.metadata.get("agent-name", "unknown_agent")
-    agent = AGENTS.get(agent_name)
-    logger.info(f"Routing to agent: {agent_name}")
-
-    return agent.handle(req)
-
-
-@app.get("/healthz")
-async def healthz():
-    return {"status": "ok"}
diff --git a/demos/use_cases/orchestrating_agents/poetry.lock b/demos/use_cases/orchestrating_agents/poetry.lock
deleted file mode 100644
index 929a5c86..00000000
--- a/demos/use_cases/orchestrating_agents/poetry.lock
+++ /dev/null
@@ -1,573 +0,0 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
-
-[[package]]
-name = "annotated-types"
-version = "0.7.0"
-description = "Reusable constraint types to use with typing.Annotated"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"},
-    {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"},
-]
-
-[[package]]
-name = "anyio"
-version = "4.9.0"
-description = "High level compatibility layer for multiple asynchronous event loop implementations"
-optional = false
-python-versions = ">=3.9"
-files = [
-    {file = "anyio-4.9.0-py3-none-any.whl", hash = "sha256:9f76d541cad6e36af7beb62e978876f3b41e3e04f2c1fbf0884604c0a9c4d93c"},
-    {file = "anyio-4.9.0.tar.gz", hash = "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028"},
-]
-
-[package.dependencies]
-exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
-idna = ">=2.8"
-sniffio = ">=1.1"
-typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""}
-
-[package.extras]
-doc = ["Sphinx (>=8.2,<9.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx_rtd_theme"]
-test = ["anyio[trio]", "blockbuster (>=1.5.23)", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21)"]
-trio = ["trio (>=0.26.1)"]
-
-[[package]]
-name = "certifi"
-version = "2025.1.31"
-description = "Python package for providing Mozilla's CA Bundle."
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"},
-    {file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"},
-]
-
-[[package]]
-name = "click"
-version = "8.1.8"
-description = "Composable command line interface toolkit"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"},
-    {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"},
-]
-
-[package.dependencies]
-colorama = {version = "*", markers = "platform_system == \"Windows\""}
-
-[[package]]
-name = "colorama"
-version = "0.4.6"
-description = "Cross-platform colored terminal text."
-optional = false
-python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
-files = [
-    {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
-    {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
-]
-
-[[package]]
-name = "distro"
-version = "1.9.0"
-description = "Distro - an OS platform information API"
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
-    {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
-]
-
-[[package]]
-name = "exceptiongroup"
-version = "1.2.2"
-description = "Backport of PEP 654 (exception groups)"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
-    {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
-]
-
-[package.extras]
-test = ["pytest (>=6)"]
-
-[[package]]
-name = "fastapi"
-version = "0.115.11"
-description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "fastapi-0.115.11-py3-none-any.whl", hash = "sha256:32e1541b7b74602e4ef4a0260ecaf3aadf9d4f19590bba3e1bf2ac4666aa2c64"},
-    {file = "fastapi-0.115.11.tar.gz", hash = "sha256:cc81f03f688678b92600a65a5e618b93592c65005db37157147204d8924bf94f"},
-]
-
-[package.dependencies]
-pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0"
-starlette = ">=0.40.0,<0.47.0"
-typing-extensions = ">=4.8.0"
-
-[package.extras]
-all = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.5)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=3.1.5)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.18)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"]
-standard = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.5)", "httpx (>=0.23.0)", "jinja2 (>=3.1.5)", "python-multipart (>=0.0.18)", "uvicorn[standard] (>=0.12.0)"]
-
-[[package]]
-name = "h11"
-version = "0.14.0"
-description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
-    {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
-]
-
-[[package]]
-name = "httpcore"
-version = "1.0.7"
-description = "A minimal low-level HTTP client."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd"},
-    {file = "httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c"},
-]
-
-[package.dependencies]
-certifi = "*"
-h11 = ">=0.13,<0.15"
-
-[package.extras]
-asyncio = ["anyio (>=4.0,<5.0)"]
-http2 = ["h2 (>=3,<5)"]
-socks = ["socksio (==1.*)"]
-trio = ["trio (>=0.22.0,<1.0)"]
-
-[[package]]
-name = "httpx"
-version = "0.28.1"
-description = "The next generation HTTP client."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"},
-    {file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"},
-]
-
-[package.dependencies]
-anyio = "*"
-certifi = "*"
-httpcore = "==1.*"
-idna = "*"
-
-[package.extras]
-brotli = ["brotli", "brotlicffi"]
-cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
-http2 = ["h2 (>=3,<5)"]
-socks = ["socksio (==1.*)"]
-zstd = ["zstandard (>=0.18.0)"]
-
-[[package]]
-name = "idna"
-version = "3.10"
-description = "Internationalized Domain Names in Applications (IDNA)"
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"},
-    {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"},
-]
-
-[package.extras]
-all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
-
-[[package]]
-name = "jiter"
-version = "0.9.0"
-description = "Fast iterable JSON parser."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "jiter-0.9.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:816ec9b60fdfd1fec87da1d7ed46c66c44ffec37ab2ef7de5b147b2fce3fd5ad"},
-    {file = "jiter-0.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9b1d3086f8a3ee0194ecf2008cf81286a5c3e540d977fa038ff23576c023c0ea"},
-    {file = "jiter-0.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1339f839b91ae30b37c409bf16ccd3dc453e8b8c3ed4bd1d6a567193651a4a51"},
-    {file = "jiter-0.9.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ffba79584b3b670fefae66ceb3a28822365d25b7bf811e030609a3d5b876f538"},
-    {file = "jiter-0.9.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cfc7d0a8e899089d11f065e289cb5b2daf3d82fbe028f49b20d7b809193958d"},
-    {file = "jiter-0.9.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e00a1a2bbfaaf237e13c3d1592356eab3e9015d7efd59359ac8b51eb56390a12"},
-    {file = "jiter-0.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1d9870561eb26b11448854dce0ff27a9a27cb616b632468cafc938de25e9e51"},
-    {file = "jiter-0.9.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9872aeff3f21e437651df378cb75aeb7043e5297261222b6441a620218b58708"},
-    {file = "jiter-0.9.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:1fd19112d1049bdd47f17bfbb44a2c0001061312dcf0e72765bfa8abd4aa30e5"},
-    {file = "jiter-0.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6ef5da104664e526836070e4a23b5f68dec1cc673b60bf1edb1bfbe8a55d0678"},
-    {file = "jiter-0.9.0-cp310-cp310-win32.whl", hash = "sha256:cb12e6d65ebbefe5518de819f3eda53b73187b7089040b2d17f5b39001ff31c4"},
-    {file = "jiter-0.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:c43ca669493626d8672be3b645dbb406ef25af3f4b6384cfd306da7eb2e70322"},
-    {file = "jiter-0.9.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6c4d99c71508912a7e556d631768dcdef43648a93660670986916b297f1c54af"},
-    {file = "jiter-0.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8f60fb8ce7df529812bf6c625635a19d27f30806885139e367af93f6e734ef58"},
-    {file = "jiter-0.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51c4e1a4f8ea84d98b7b98912aa4290ac3d1eabfde8e3c34541fae30e9d1f08b"},
-    {file = "jiter-0.9.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f4c677c424dc76684fea3e7285a7a2a7493424bea89ac441045e6a1fb1d7b3b"},
-    {file = "jiter-0.9.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2221176dfec87f3470b21e6abca056e6b04ce9bff72315cb0b243ca9e835a4b5"},
-    {file = "jiter-0.9.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3c7adb66f899ffa25e3c92bfcb593391ee1947dbdd6a9a970e0d7e713237d572"},
-    {file = "jiter-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c98d27330fdfb77913c1097a7aab07f38ff2259048949f499c9901700789ac15"},
-    {file = "jiter-0.9.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:eda3f8cc74df66892b1d06b5d41a71670c22d95a1ca2cbab73654745ce9d0419"},
-    {file = "jiter-0.9.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:dd5ab5ddc11418dce28343123644a100f487eaccf1de27a459ab36d6cca31043"},
-    {file = "jiter-0.9.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:42f8a68a69f047b310319ef8e2f52fdb2e7976fb3313ef27df495cf77bcad965"},
-    {file = "jiter-0.9.0-cp311-cp311-win32.whl", hash = "sha256:a25519efb78a42254d59326ee417d6f5161b06f5da827d94cf521fed961b1ff2"},
-    {file = "jiter-0.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:923b54afdd697dfd00d368b7ccad008cccfeb1efb4e621f32860c75e9f25edbd"},
-    {file = "jiter-0.9.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:7b46249cfd6c48da28f89eb0be3f52d6fdb40ab88e2c66804f546674e539ec11"},
-    {file = "jiter-0.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:609cf3c78852f1189894383cf0b0b977665f54cb38788e3e6b941fa6d982c00e"},
-    {file = "jiter-0.9.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d726a3890a54561e55a9c5faea1f7655eda7f105bd165067575ace6e65f80bb2"},
-    {file = "jiter-0.9.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2e89dc075c1fef8fa9be219e249f14040270dbc507df4215c324a1839522ea75"},
-    {file = "jiter-0.9.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:04e8ffa3c353b1bc4134f96f167a2082494351e42888dfcf06e944f2729cbe1d"},
-    {file = "jiter-0.9.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:203f28a72a05ae0e129b3ed1f75f56bc419d5f91dfacd057519a8bd137b00c42"},
-    {file = "jiter-0.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fca1a02ad60ec30bb230f65bc01f611c8608b02d269f998bc29cca8619a919dc"},
-    {file = "jiter-0.9.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:237e5cee4d5d2659aaf91bbf8ec45052cc217d9446070699441a91b386ae27dc"},
-    {file = "jiter-0.9.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:528b6b71745e7326eed73c53d4aa57e2a522242320b6f7d65b9c5af83cf49b6e"},
-    {file = "jiter-0.9.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9f48e86b57bc711eb5acdfd12b6cb580a59cc9a993f6e7dcb6d8b50522dcd50d"},
-    {file = "jiter-0.9.0-cp312-cp312-win32.whl", hash = "sha256:699edfde481e191d81f9cf6d2211debbfe4bd92f06410e7637dffb8dd5dfde06"},
-    {file = "jiter-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:099500d07b43f61d8bd780466d429c45a7b25411b334c60ca875fa775f68ccb0"},
-    {file = "jiter-0.9.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:2764891d3f3e8b18dce2cff24949153ee30c9239da7c00f032511091ba688ff7"},
-    {file = "jiter-0.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:387b22fbfd7a62418d5212b4638026d01723761c75c1c8232a8b8c37c2f1003b"},
-    {file = "jiter-0.9.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d8da8629ccae3606c61d9184970423655fb4e33d03330bcdfe52d234d32f69"},
-    {file = "jiter-0.9.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1be73d8982bdc278b7b9377426a4b44ceb5c7952073dd7488e4ae96b88e1103"},
-    {file = "jiter-0.9.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2228eaaaa111ec54b9e89f7481bffb3972e9059301a878d085b2b449fbbde635"},
-    {file = "jiter-0.9.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:11509bfecbc319459647d4ac3fd391d26fdf530dad00c13c4dadabf5b81f01a4"},
-    {file = "jiter-0.9.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f22238da568be8bbd8e0650e12feeb2cfea15eda4f9fc271d3b362a4fa0604d"},
-    {file = "jiter-0.9.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:17f5d55eb856597607562257c8e36c42bc87f16bef52ef7129b7da11afc779f3"},
-    {file = "jiter-0.9.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:6a99bed9fbb02f5bed416d137944419a69aa4c423e44189bc49718859ea83bc5"},
-    {file = "jiter-0.9.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e057adb0cd1bd39606100be0eafe742de2de88c79df632955b9ab53a086b3c8d"},
-    {file = "jiter-0.9.0-cp313-cp313-win32.whl", hash = "sha256:f7e6850991f3940f62d387ccfa54d1a92bd4bb9f89690b53aea36b4364bcab53"},
-    {file = "jiter-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:c8ae3bf27cd1ac5e6e8b7a27487bf3ab5f82318211ec2e1346a5b058756361f7"},
-    {file = "jiter-0.9.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f0b2827fb88dda2cbecbbc3e596ef08d69bda06c6f57930aec8e79505dc17001"},
-    {file = "jiter-0.9.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:062b756ceb1d40b0b28f326cba26cfd575a4918415b036464a52f08632731e5a"},
-    {file = "jiter-0.9.0-cp313-cp313t-win_amd64.whl", hash = "sha256:6f7838bc467ab7e8ef9f387bd6de195c43bad82a569c1699cb822f6609dd4cdf"},
-    {file = "jiter-0.9.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:4a2d16360d0642cd68236f931b85fe50288834c383492e4279d9f1792e309571"},
-    {file = "jiter-0.9.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e84ed1c9c9ec10bbb8c37f450077cbe3c0d4e8c2b19f0a49a60ac7ace73c7452"},
-    {file = "jiter-0.9.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f3c848209ccd1bfa344a1240763975ca917de753c7875c77ec3034f4151d06c"},
-    {file = "jiter-0.9.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7825f46e50646bee937e0f849d14ef3a417910966136f59cd1eb848b8b5bb3e4"},
-    {file = "jiter-0.9.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d82a811928b26d1a6311a886b2566f68ccf2b23cf3bfed042e18686f1f22c2d7"},
-    {file = "jiter-0.9.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0c058ecb51763a67f019ae423b1cbe3fa90f7ee6280c31a1baa6ccc0c0e2d06e"},
-    {file = "jiter-0.9.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9897115ad716c48f0120c1f0c4efae348ec47037319a6c63b2d7838bb53aaef4"},
-    {file = "jiter-0.9.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:351f4c90a24c4fb8c87c6a73af2944c440494ed2bea2094feecacb75c50398ae"},
-    {file = "jiter-0.9.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:d45807b0f236c485e1e525e2ce3a854807dfe28ccf0d013dd4a563395e28008a"},
-    {file = "jiter-0.9.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:1537a890724ba00fdba21787010ac6f24dad47f763410e9e1093277913592784"},
-    {file = "jiter-0.9.0-cp38-cp38-win32.whl", hash = "sha256:e3630ec20cbeaddd4b65513fa3857e1b7c4190d4481ef07fb63d0fad59033321"},
-    {file = "jiter-0.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:2685f44bf80e95f8910553bf2d33b9c87bf25fceae6e9f0c1355f75d2922b0ee"},
-    {file = "jiter-0.9.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:9ef340fae98065071ccd5805fe81c99c8f80484e820e40043689cf97fb66b3e2"},
-    {file = "jiter-0.9.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:efb767d92c63b2cd9ec9f24feeb48f49574a713870ec87e9ba0c2c6e9329c3e2"},
-    {file = "jiter-0.9.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:113f30f87fb1f412510c6d7ed13e91422cfd329436364a690c34c8b8bd880c42"},
-    {file = "jiter-0.9.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8793b6df019b988526f5a633fdc7456ea75e4a79bd8396a3373c371fc59f5c9b"},
-    {file = "jiter-0.9.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7a9aaa5102dba4e079bb728076fadd5a2dca94c05c04ce68004cfd96f128ea34"},
-    {file = "jiter-0.9.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d838650f6ebaf4ccadfb04522463e74a4c378d7e667e0eb1865cfe3990bfac49"},
-    {file = "jiter-0.9.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0194f813efdf4b8865ad5f5c5f50f8566df7d770a82c51ef593d09e0b347020"},
-    {file = "jiter-0.9.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a7954a401d0a8a0b8bc669199db78af435aae1e3569187c2939c477c53cb6a0a"},
-    {file = "jiter-0.9.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4feafe787eb8a8d98168ab15637ca2577f6ddf77ac6c8c66242c2d028aa5420e"},
-    {file = "jiter-0.9.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:27cd1f2e8bb377f31d3190b34e4328d280325ad7ef55c6ac9abde72f79e84d2e"},
-    {file = "jiter-0.9.0-cp39-cp39-win32.whl", hash = "sha256:161d461dcbe658cf0bd0aa375b30a968b087cdddc624fc585f3867c63c6eca95"},
-    {file = "jiter-0.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:e8b36d8a16a61993be33e75126ad3d8aa29cf450b09576f3c427d27647fcb4aa"},
-    {file = "jiter-0.9.0.tar.gz", hash = "sha256:aadba0964deb424daa24492abc3d229c60c4a31bfee205aedbf1acc7639d7893"},
-]
-
-[[package]]
-name = "openai"
-version = "1.66.5"
-description = "The official Python library for the openai API"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "openai-1.66.5-py3-none-any.whl", hash = "sha256:74be528175f8389f67675830c51a15bd51e874425c86d3de6153bf70ed6c2884"},
-    {file = "openai-1.66.5.tar.gz", hash = "sha256:f61b8fac29490ca8fdc6d996aa6926c18dbe5639536f8c40219c40db05511b11"},
-]
-
-[package.dependencies]
-anyio = ">=3.5.0,<5"
-distro = ">=1.7.0,<2"
-httpx = ">=0.23.0,<1"
-jiter = ">=0.4.0,<1"
-pydantic = ">=1.9.0,<3"
-sniffio = "*"
-tqdm = ">4"
-typing-extensions = ">=4.11,<5"
-
-[package.extras]
-datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
-realtime = ["websockets (>=13,<15)"]
-
-[[package]]
-name = "pydantic"
-version = "2.10.6"
-description = "Data validation using Python type hints"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "pydantic-2.10.6-py3-none-any.whl", hash = "sha256:427d664bf0b8a2b34ff5dd0f5a18df00591adcee7198fbd71981054cef37b584"},
-    {file = "pydantic-2.10.6.tar.gz", hash = "sha256:ca5daa827cce33de7a42be142548b0096bf05a7e7b365aebfa5f8eeec7128236"},
-]
-
-[package.dependencies]
-annotated-types = ">=0.6.0"
-pydantic-core = "2.27.2"
-typing-extensions = ">=4.12.2"
-
-[package.extras]
-email = ["email-validator (>=2.0.0)"]
-timezone = ["tzdata"]
-
-[[package]]
-name = "pydantic-core"
-version = "2.27.2"
-description = "Core functionality for Pydantic validation and serialization"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "pydantic_core-2.27.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2d367ca20b2f14095a8f4fa1210f5a7b78b8a20009ecced6b12818f455b1e9fa"},
-    {file = "pydantic_core-2.27.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:491a2b73db93fab69731eaee494f320faa4e093dbed776be1a829c2eb222c34c"},
-    {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7969e133a6f183be60e9f6f56bfae753585680f3b7307a8e555a948d443cc05a"},
-    {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3de9961f2a346257caf0aa508a4da705467f53778e9ef6fe744c038119737ef5"},
-    {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e2bb4d3e5873c37bb3dd58714d4cd0b0e6238cebc4177ac8fe878f8b3aa8e74c"},
-    {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:280d219beebb0752699480fe8f1dc61ab6615c2046d76b7ab7ee38858de0a4e7"},
-    {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47956ae78b6422cbd46f772f1746799cbb862de838fd8d1fbd34a82e05b0983a"},
-    {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:14d4a5c49d2f009d62a2a7140d3064f686d17a5d1a268bc641954ba181880236"},
-    {file = "pydantic_core-2.27.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:337b443af21d488716f8d0b6164de833e788aa6bd7e3a39c005febc1284f4962"},
-    {file = "pydantic_core-2.27.2-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:03d0f86ea3184a12f41a2d23f7ccb79cdb5a18e06993f8a45baa8dfec746f0e9"},
-    {file = "pydantic_core-2.27.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7041c36f5680c6e0f08d922aed302e98b3745d97fe1589db0a3eebf6624523af"},
-    {file = "pydantic_core-2.27.2-cp310-cp310-win32.whl", hash = "sha256:50a68f3e3819077be2c98110c1f9dcb3817e93f267ba80a2c05bb4f8799e2ff4"},
-    {file = "pydantic_core-2.27.2-cp310-cp310-win_amd64.whl", hash = "sha256:e0fd26b16394ead34a424eecf8a31a1f5137094cabe84a1bcb10fa6ba39d3d31"},
-    {file = "pydantic_core-2.27.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:8e10c99ef58cfdf2a66fc15d66b16c4a04f62bca39db589ae8cba08bc55331bc"},
-    {file = "pydantic_core-2.27.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:26f32e0adf166a84d0cb63be85c562ca8a6fa8de28e5f0d92250c6b7e9e2aff7"},
-    {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c19d1ea0673cd13cc2f872f6c9ab42acc4e4f492a7ca9d3795ce2b112dd7e15"},
-    {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5e68c4446fe0810e959cdff46ab0a41ce2f2c86d227d96dc3847af0ba7def306"},
-    {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d9640b0059ff4f14d1f37321b94061c6db164fbe49b334b31643e0528d100d99"},
-    {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:40d02e7d45c9f8af700f3452f329ead92da4c5f4317ca9b896de7ce7199ea459"},
-    {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c1fd185014191700554795c99b347d64f2bb637966c4cfc16998a0ca700d048"},
-    {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d81d2068e1c1228a565af076598f9e7451712700b673de8f502f0334f281387d"},
-    {file = "pydantic_core-2.27.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1a4207639fb02ec2dbb76227d7c751a20b1a6b4bc52850568e52260cae64ca3b"},
-    {file = "pydantic_core-2.27.2-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:3de3ce3c9ddc8bbd88f6e0e304dea0e66d843ec9de1b0042b0911c1663ffd474"},
-    {file = "pydantic_core-2.27.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:30c5f68ded0c36466acede341551106821043e9afaad516adfb6e8fa80a4e6a6"},
-    {file = "pydantic_core-2.27.2-cp311-cp311-win32.whl", hash = "sha256:c70c26d2c99f78b125a3459f8afe1aed4d9687c24fd677c6a4436bc042e50d6c"},
-    {file = "pydantic_core-2.27.2-cp311-cp311-win_amd64.whl", hash = "sha256:08e125dbdc505fa69ca7d9c499639ab6407cfa909214d500897d02afb816e7cc"},
-    {file = "pydantic_core-2.27.2-cp311-cp311-win_arm64.whl", hash = "sha256:26f0d68d4b235a2bae0c3fc585c585b4ecc51382db0e3ba402a22cbc440915e4"},
-    {file = "pydantic_core-2.27.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:9e0c8cfefa0ef83b4da9588448b6d8d2a2bf1a53c3f1ae5fca39eb3061e2f0b0"},
-    {file = "pydantic_core-2.27.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:83097677b8e3bd7eaa6775720ec8e0405f1575015a463285a92bfdfe254529ef"},
-    {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:172fce187655fece0c90d90a678424b013f8fbb0ca8b036ac266749c09438cb7"},
-    {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:519f29f5213271eeeeb3093f662ba2fd512b91c5f188f3bb7b27bc5973816934"},
-    {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:05e3a55d124407fffba0dd6b0c0cd056d10e983ceb4e5dbd10dda135c31071d6"},
-    {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c3ed807c7b91de05e63930188f19e921d1fe90de6b4f5cd43ee7fcc3525cb8c"},
-    {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fb4aadc0b9a0c063206846d603b92030eb6f03069151a625667f982887153e2"},
-    {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:28ccb213807e037460326424ceb8b5245acb88f32f3d2777427476e1b32c48c4"},
-    {file = "pydantic_core-2.27.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:de3cd1899e2c279b140adde9357c4495ed9d47131b4a4eaff9052f23398076b3"},
-    {file = "pydantic_core-2.27.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:220f892729375e2d736b97d0e51466252ad84c51857d4d15f5e9692f9ef12be4"},
-    {file = "pydantic_core-2.27.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a0fcd29cd6b4e74fe8ddd2c90330fd8edf2e30cb52acda47f06dd615ae72da57"},
-    {file = "pydantic_core-2.27.2-cp312-cp312-win32.whl", hash = "sha256:1e2cb691ed9834cd6a8be61228471d0a503731abfb42f82458ff27be7b2186fc"},
-    {file = "pydantic_core-2.27.2-cp312-cp312-win_amd64.whl", hash = "sha256:cc3f1a99a4f4f9dd1de4fe0312c114e740b5ddead65bb4102884b384c15d8bc9"},
-    {file = "pydantic_core-2.27.2-cp312-cp312-win_arm64.whl", hash = "sha256:3911ac9284cd8a1792d3cb26a2da18f3ca26c6908cc434a18f730dc0db7bfa3b"},
-    {file = "pydantic_core-2.27.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:7d14bd329640e63852364c306f4d23eb744e0f8193148d4044dd3dacdaacbd8b"},
-    {file = "pydantic_core-2.27.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:82f91663004eb8ed30ff478d77c4d1179b3563df6cdb15c0817cd1cdaf34d154"},
-    {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71b24c7d61131bb83df10cc7e687433609963a944ccf45190cfc21e0887b08c9"},
-    {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fa8e459d4954f608fa26116118bb67f56b93b209c39b008277ace29937453dc9"},
-    {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ce8918cbebc8da707ba805b7fd0b382816858728ae7fe19a942080c24e5b7cd1"},
-    {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eda3f5c2a021bbc5d976107bb302e0131351c2ba54343f8a496dc8783d3d3a6a"},
-    {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd8086fa684c4775c27f03f062cbb9eaa6e17f064307e86b21b9e0abc9c0f02e"},
-    {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8d9b3388db186ba0c099a6d20f0604a44eabdeef1777ddd94786cdae158729e4"},
-    {file = "pydantic_core-2.27.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:7a66efda2387de898c8f38c0cf7f14fca0b51a8ef0b24bfea5849f1b3c95af27"},
-    {file = "pydantic_core-2.27.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:18a101c168e4e092ab40dbc2503bdc0f62010e95d292b27827871dc85450d7ee"},
-    {file = "pydantic_core-2.27.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ba5dd002f88b78a4215ed2f8ddbdf85e8513382820ba15ad5ad8955ce0ca19a1"},
-    {file = "pydantic_core-2.27.2-cp313-cp313-win32.whl", hash = "sha256:1ebaf1d0481914d004a573394f4be3a7616334be70261007e47c2a6fe7e50130"},
-    {file = "pydantic_core-2.27.2-cp313-cp313-win_amd64.whl", hash = "sha256:953101387ecf2f5652883208769a79e48db18c6df442568a0b5ccd8c2723abee"},
-    {file = "pydantic_core-2.27.2-cp313-cp313-win_arm64.whl", hash = "sha256:ac4dbfd1691affb8f48c2c13241a2e3b60ff23247cbcf981759c768b6633cf8b"},
-    {file = "pydantic_core-2.27.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:d3e8d504bdd3f10835468f29008d72fc8359d95c9c415ce6e767203db6127506"},
-    {file = "pydantic_core-2.27.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:521eb9b7f036c9b6187f0b47318ab0d7ca14bd87f776240b90b21c1f4f149320"},
-    {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85210c4d99a0114f5a9481b44560d7d1e35e32cc5634c656bc48e590b669b145"},
-    {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d716e2e30c6f140d7560ef1538953a5cd1a87264c737643d481f2779fc247fe1"},
-    {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f66d89ba397d92f840f8654756196d93804278457b5fbede59598a1f9f90b228"},
-    {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:669e193c1c576a58f132e3158f9dfa9662969edb1a250c54d8fa52590045f046"},
-    {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdbe7629b996647b99c01b37f11170a57ae675375b14b8c13b8518b8320ced5"},
-    {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d262606bf386a5ba0b0af3b97f37c83d7011439e3dc1a9298f21efb292e42f1a"},
-    {file = "pydantic_core-2.27.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:cabb9bcb7e0d97f74df8646f34fc76fbf793b7f6dc2438517d7a9e50eee4f14d"},
-    {file = "pydantic_core-2.27.2-cp38-cp38-musllinux_1_1_armv7l.whl", hash = "sha256:d2d63f1215638d28221f664596b1ccb3944f6e25dd18cd3b86b0a4c408d5ebb9"},
-    {file = "pydantic_core-2.27.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:bca101c00bff0adb45a833f8451b9105d9df18accb8743b08107d7ada14bd7da"},
-    {file = "pydantic_core-2.27.2-cp38-cp38-win32.whl", hash = "sha256:f6f8e111843bbb0dee4cb6594cdc73e79b3329b526037ec242a3e49012495b3b"},
-    {file = "pydantic_core-2.27.2-cp38-cp38-win_amd64.whl", hash = "sha256:fd1aea04935a508f62e0d0ef1f5ae968774a32afc306fb8545e06f5ff5cdf3ad"},
-    {file = "pydantic_core-2.27.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:c10eb4f1659290b523af58fa7cffb452a61ad6ae5613404519aee4bfbf1df993"},
-    {file = "pydantic_core-2.27.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ef592d4bad47296fb11f96cd7dc898b92e795032b4894dfb4076cfccd43a9308"},
-    {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c61709a844acc6bf0b7dce7daae75195a10aac96a596ea1b776996414791ede4"},
-    {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:42c5f762659e47fdb7b16956c71598292f60a03aa92f8b6351504359dbdba6cf"},
-    {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4c9775e339e42e79ec99c441d9730fccf07414af63eac2f0e48e08fd38a64d76"},
-    {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:57762139821c31847cfb2df63c12f725788bd9f04bc2fb392790959b8f70f118"},
-    {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d1e85068e818c73e048fe28cfc769040bb1f475524f4745a5dc621f75ac7630"},
-    {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:097830ed52fd9e427942ff3b9bc17fab52913b2f50f2880dc4a5611446606a54"},
-    {file = "pydantic_core-2.27.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:044a50963a614ecfae59bb1eaf7ea7efc4bc62f49ed594e18fa1e5d953c40e9f"},
-    {file = "pydantic_core-2.27.2-cp39-cp39-musllinux_1_1_armv7l.whl", hash = "sha256:4e0b4220ba5b40d727c7f879eac379b822eee5d8fff418e9d3381ee45b3b0362"},
-    {file = "pydantic_core-2.27.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5e4f4bb20d75e9325cc9696c6802657b58bc1dbbe3022f32cc2b2b632c3fbb96"},
-    {file = "pydantic_core-2.27.2-cp39-cp39-win32.whl", hash = "sha256:cca63613e90d001b9f2f9a9ceb276c308bfa2a43fafb75c8031c4f66039e8c6e"},
-    {file = "pydantic_core-2.27.2-cp39-cp39-win_amd64.whl", hash = "sha256:77d1bca19b0f7021b3a982e6f903dcd5b2b06076def36a652e3907f596e29f67"},
-    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:2bf14caea37e91198329b828eae1618c068dfb8ef17bb33287a7ad4b61ac314e"},
-    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:b0cb791f5b45307caae8810c2023a184c74605ec3bcbb67d13846c28ff731ff8"},
-    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:688d3fd9fcb71f41c4c015c023d12a79d1c4c0732ec9eb35d96e3388a120dcf3"},
-    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d591580c34f4d731592f0e9fe40f9cc1b430d297eecc70b962e93c5c668f15f"},
-    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:82f986faf4e644ffc189a7f1aafc86e46ef70372bb153e7001e8afccc6e54133"},
-    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:bec317a27290e2537f922639cafd54990551725fc844249e64c523301d0822fc"},
-    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:0296abcb83a797db256b773f45773da397da75a08f5fcaef41f2044adec05f50"},
-    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:0d75070718e369e452075a6017fbf187f788e17ed67a3abd47fa934d001863d9"},
-    {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:7e17b560be3c98a8e3aa66ce828bdebb9e9ac6ad5466fba92eb74c4c95cb1151"},
-    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c33939a82924da9ed65dab5a65d427205a73181d8098e79b6b426bdf8ad4e656"},
-    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:00bad2484fa6bda1e216e7345a798bd37c68fb2d97558edd584942aa41b7d278"},
-    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c817e2b40aba42bac6f457498dacabc568c3b7a986fc9ba7c8d9d260b71485fb"},
-    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:251136cdad0cb722e93732cb45ca5299fb56e1344a833640bf93b2803f8d1bfd"},
-    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d2088237af596f0a524d3afc39ab3b036e8adb054ee57cbb1dcf8e09da5b29cc"},
-    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:d4041c0b966a84b4ae7a09832eb691a35aec90910cd2dbe7a208de59be77965b"},
-    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:8083d4e875ebe0b864ffef72a4304827015cff328a1be6e22cc850753bfb122b"},
-    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f141ee28a0ad2123b6611b6ceff018039df17f32ada8b534e6aa039545a3efb2"},
-    {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7d0c8399fcc1848491f00e0314bd59fb34a9c008761bcb422a057670c3f65e35"},
-    {file = "pydantic_core-2.27.2.tar.gz", hash = "sha256:eb026e5a4c1fee05726072337ff51d1efb6f59090b7da90d30ea58625b1ffb39"},
-]
-
-[package.dependencies]
-typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
-
-[[package]]
-name = "pyyaml"
-version = "6.0.2"
-description = "YAML parser and emitter for Python"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"},
-    {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"},
-    {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237"},
-    {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b"},
-    {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed"},
-    {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180"},
-    {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68"},
-    {file = "PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99"},
-    {file = "PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e"},
-    {file = "PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774"},
-    {file = "PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee"},
-    {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c"},
-    {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317"},
-    {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85"},
-    {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4"},
-    {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e"},
-    {file = "PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5"},
-    {file = "PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44"},
-    {file = "PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab"},
-    {file = "PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725"},
-    {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5"},
-    {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425"},
-    {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476"},
-    {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48"},
-    {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b"},
-    {file = "PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4"},
-    {file = "PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8"},
-    {file = "PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba"},
-    {file = "PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1"},
-    {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133"},
-    {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484"},
-    {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5"},
-    {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc"},
-    {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652"},
-    {file = "PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183"},
-    {file = "PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563"},
-    {file = "PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a"},
-    {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5"},
-    {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d"},
-    {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083"},
-    {file = "PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706"},
-    {file = "PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a"},
-    {file = "PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff"},
-    {file = "PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d"},
-    {file = "PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f"},
-    {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290"},
-    {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12"},
-    {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19"},
-    {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e"},
-    {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725"},
-    {file = "PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631"},
-    {file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"},
-    {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"},
-]
-
-[[package]]
-name = "sniffio"
-version = "1.3.1"
-description = "Sniff out which async library your code is running under"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
-    {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
-]
-
-[[package]]
-name = "starlette"
-version = "0.46.1"
-description = "The little ASGI library that shines."
-optional = false
-python-versions = ">=3.9"
-files = [
-    {file = "starlette-0.46.1-py3-none-any.whl", hash = "sha256:77c74ed9d2720138b25875133f3a2dae6d854af2ec37dceb56aef370c1d8a227"},
-    {file = "starlette-0.46.1.tar.gz", hash = "sha256:3c88d58ee4bd1bb807c0d1acb381838afc7752f9ddaec81bbe4383611d833230"},
-]
-
-[package.dependencies]
-anyio = ">=3.6.2,<5"
-
-[package.extras]
-full = ["httpx (>=0.27.0,<0.29.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.18)", "pyyaml"]
-
-[[package]]
-name = "tqdm"
-version = "4.67.1"
-description = "Fast, Extensible Progress Meter"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"},
-    {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"},
-]
-
-[package.dependencies]
-colorama = {version = "*", markers = "platform_system == \"Windows\""}
-
-[package.extras]
-dev = ["nbval", "pytest (>=6)", "pytest-asyncio (>=0.24)", "pytest-cov", "pytest-timeout"]
-discord = ["requests"]
-notebook = ["ipywidgets (>=6)"]
-slack = ["slack-sdk"]
-telegram = ["requests"]
-
-[[package]]
-name = "typing-extensions"
-version = "4.12.2"
-description = "Backported and Experimental Type Hints for Python 3.8+"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
-    {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
-]
-
-[[package]]
-name = "uvicorn"
-version = "0.34.0"
-description = "The lightning-fast ASGI server."
-optional = false
-python-versions = ">=3.9"
-files = [
-    {file = "uvicorn-0.34.0-py3-none-any.whl", hash = "sha256:023dc038422502fa28a09c7a30bf2b6991512da7dcdb8fd35fe57cfc154126f4"},
-    {file = "uvicorn-0.34.0.tar.gz", hash = "sha256:404051050cd7e905de2c9a7e61790943440b3416f49cb409f965d9dcd0fa73e9"},
-]
-
-[package.dependencies]
-click = ">=7.0"
-h11 = ">=0.8"
-typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""}
-
-[package.extras]
-standard = ["colorama (>=0.4)", "httptools (>=0.6.3)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"]
-
-[metadata]
-lock-version = "2.0"
-python-versions = "^3.10"
-content-hash = "d005d82268b6f8c2a68b26c454bced5c34bf3c971c0cbfefde3fc0c45c675f55"
diff --git a/demos/use_cases/orchestrating_agents/pyproject.toml b/demos/use_cases/orchestrating_agents/pyproject.toml
deleted file mode 100644
index 7b422438..00000000
--- a/demos/use_cases/orchestrating_agents/pyproject.toml
+++ /dev/null
@@ -1,20 +0,0 @@
-[tool.poetry]
-name = "api-server"
-version = "0.1.0"
-description = ""
-authors = ["Adil Hafeez <info@katanemo.com>"]
-readme = "README.md"
-
-[tool.poetry.dependencies]
-python = "^3.10"
-fastapi = "^0.115.4"
-pyyaml = "^6.0.2"
-uvicorn = "^0.34.0"
-openai = "^1.66.5"
-
-[build-system]
-requires = ["poetry-core"]
-build-backend = "poetry.core.masonry.api"
-
-[tool.poetry.scripts]
-api-server = "api_server.main:app"
diff --git a/demos/use_cases/orchestrating_agents/run_demo.sh b/demos/use_cases/orchestrating_agents/run_demo.sh
deleted file mode 100644
index eb47dce6..00000000
--- a/demos/use_cases/orchestrating_agents/run_demo.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash
-set -e
-
-# Function to start the demo
-start_demo() {
-  # Step 1: Check if .env file exists
-  if [ -f ".env" ]; then
-    echo ".env file already exists. Skipping creation."
-  else
-    # Step 2: Create `.env` file and set OpenAI key
-    if [ -z "$OPENAI_API_KEY" ]; then
-      echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
-      exit 1
-    fi
-
-    echo "Creating .env file..."
-    echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
-    echo ".env file created with OPENAI_API_KEY."
-  fi
-
-  # Step 3: Start Arch
-  echo "Starting Arch with arch_config.yaml..."
-  archgw up arch_config.yaml
-
-  # Step 4: Start developer services
-  echo "Starting Network Agent using Docker Compose..."
-  docker compose up -d  # Run in detached mode
-}
-
-# Function to stop the demo
-stop_demo() {
-  # Step 1: Stop Docker Compose services
-  echo "Stopping Network Agent using Docker Compose..."
-  docker compose down
-
-  # Step 2: Stop Arch
-  echo "Stopping Arch..."
-  archgw down
-}
-
-# Main script logic
-if [ "$1" == "down" ]; then
-  stop_demo
-else
-  # Default action is to bring the demo up
-  start_demo
-fi
diff --git a/docs/source/resources/includes/arch_config_full_reference_rendered.yaml b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml
index 0594bde2..cc0d70f3 100644
--- a/docs/source/resources/includes/arch_config_full_reference_rendered.yaml
+++ b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml
@@ -61,6 +61,9 @@ model_providers:
   port: 80
   protocol: http
   provider_interface: mistral
+- model: Arch-Function
+  name: arch-function
+  provider_interface: arch
 overrides:
   prompt_target_intent_matching_threshold: 0.6
 prompt_guards:
diff --git a/tests/archgw/test_prompt_gateway.py b/tests/archgw/test_prompt_gateway.py
index b207ebe0..0e1b4317 100644
--- a/tests/archgw/test_prompt_gateway.py
+++ b/tests/archgw/test_prompt_gateway.py
@@ -22,6 +22,28 @@ from common import (
 )
 
 
+def normalize_tool_call_arguments(tool_call):
+    """
+    Normalize tool call arguments to ensure they are always a dict.
+
+    According to OpenAI API spec, the 'arguments' field should be a JSON string,
+    but for easier testing we parse it into a dict here.
+
+    Args:
+        tool_call: A tool call dict that may have 'arguments' as either a string or dict
+
+    Returns:
+        A tool call dict with 'arguments' guaranteed to be a dict
+    """
+    if "arguments" in tool_call and isinstance(tool_call["arguments"], str):
+        try:
+            tool_call["arguments"] = json.loads(tool_call["arguments"])
+        except (json.JSONDecodeError, TypeError):
+            # If parsing fails, keep it as is
+            pass
+    return tool_call
+
+
 def test_prompt_gateway(httpserver: HTTPServer):
     simple_fixture = TEST_CASE_FIXTURES["SIMPLE"]
     input = simple_fixture["input"]
@@ -67,7 +89,7 @@ def test_prompt_gateway(httpserver: HTTPServer):
     tool_calls_message = arch_messages[0]
     tool_calls = tool_calls_message.get("tool_calls", [])
     assert len(tool_calls) > 0
-    tool_call = tool_calls[0]["function"]
+    tool_call = normalize_tool_call_arguments(tool_calls[0]["function"])
     diff = DeepDiff(tool_call, expected_tool_call, ignore_string_case=True)
     assert not diff
 
diff --git a/tests/e2e/test_prompt_gateway.py b/tests/e2e/test_prompt_gateway.py
index 2edab55d..a55e740c 100644
--- a/tests/e2e/test_prompt_gateway.py
+++ b/tests/e2e/test_prompt_gateway.py
@@ -24,6 +24,28 @@ def cleanup_tool_call(tool_call):
     return tool_call.strip()
 
 
+def normalize_tool_call_arguments(tool_call):
+    """
+    Normalize tool call arguments to ensure they are always a dict.
+
+    According to OpenAI API spec, the 'arguments' field should be a JSON string,
+    but for easier testing we parse it into a dict here.
+
+    Args:
+        tool_call: A tool call dict that may have 'arguments' as either a string or dict
+
+    Returns:
+        A tool call dict with 'arguments' guaranteed to be a dict
+    """
+    if "arguments" in tool_call and isinstance(tool_call["arguments"], str):
+        try:
+            tool_call["arguments"] = json.loads(tool_call["arguments"])
+        except (json.JSONDecodeError, TypeError):
+            # If parsing fails, keep it as is
+            pass
+    return tool_call
+
+
 @pytest.mark.parametrize("stream", [True, False])
 def test_prompt_gateway(stream):
     expected_tool_call = {
@@ -62,7 +84,7 @@ def test_prompt_gateway(stream):
         print("cleaned_tool_call_str: ", cleaned_tool_call_str)
         tool_calls = json.loads(cleaned_tool_call_str).get("tool_calls", [])
         assert len(tool_calls) > 0
-        tool_call = tool_calls[0]
+        tool_call = normalize_tool_call_arguments(tool_calls[0])
         location = tool_call["arguments"]["location"]
         assert expected_tool_call["arguments"]["location"] in location.lower()
         del expected_tool_call["arguments"]["location"]
@@ -106,7 +128,7 @@ def test_prompt_gateway(stream):
         print("cleaned_tool_call_json: ", json.dumps(cleaned_tool_call_json))
         tool_calls_list = cleaned_tool_call_json.get("tool_calls", [])
         assert len(tool_calls_list) > 0
-        tool_call = tool_calls_list[0]
+        tool_call = normalize_tool_call_arguments(tool_calls_list[0])
         location = tool_call["arguments"]["location"]
         assert expected_tool_call["arguments"]["location"] in location.lower()
         del expected_tool_call["arguments"]["location"]
@@ -241,7 +263,7 @@ def test_prompt_gateway_param_tool_call(stream):
         assert role == "assistant"
         tool_calls = choices[0].get("delta", {}).get("tool_calls", [])
         assert len(tool_calls) > 0
-        tool_call = tool_calls[0]["function"]
+        tool_call = normalize_tool_call_arguments(tool_calls[0]["function"])
         diff = DeepDiff(tool_call, expected_tool_call, ignore_string_case=True)
         assert not diff
 
@@ -275,7 +297,7 @@ def test_prompt_gateway_param_tool_call(stream):
         tool_calls_message = arch_messages[0]
         tool_calls = tool_calls_message.get("tool_calls", [])
         assert len(tool_calls) > 0
-        tool_call = tool_calls[0]["function"]
+        tool_call = normalize_tool_call_arguments(tool_calls[0]["function"])
         diff = DeepDiff(tool_call, expected_tool_call, ignore_string_case=True)
         assert not diff
 
diff --git a/tests/modelserver/test_hallucination.py b/tests/modelserver/test_hallucination.py
deleted file mode 100644
index 323db3fc..00000000
--- a/tests/modelserver/test_hallucination.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import os
-import pytest
-import requests
-import logging
-import yaml
-
-pytestmark = pytest.mark.skip(
-    reason="Skipping entire test file as hallucination is not enabled for archfc 1.1 yet"
-)
-
-MODEL_SERVER_ENDPOINT = os.getenv(
-    "MODEL_SERVER_ENDPOINT", "http://localhost:51000/function_calling"
-)
-
-# Load test data from YAML file
-script_dir = os.path.dirname(__file__)
-
-# Construct the full path to the YAML file
-yaml_file_path = os.path.join(script_dir, "test_hallucination_data.yaml")
-
-# Load test data from YAML file
-with open(yaml_file_path, "r") as file:
-    test_data_yaml = yaml.safe_load(file)
-
-
-@pytest.mark.parametrize(
-    "test_data",
-    [
-        pytest.param(test_case, id=test_case["id"])
-        for test_case in test_data_yaml["test_cases"]
-    ],
-)
-def test_model_server(test_data):
-    input = test_data["input"]
-    expected = test_data["expected"]
-
-    response = requests.post(MODEL_SERVER_ENDPOINT, json=input)
-    assert response.status_code == 200
-    assert response.headers["content-type"] == "application/json"
-
-    response_json = response.json()
-    assert response_json
-    metadata = response_json.get("metadata", {})
-    assert (metadata["hallucination"].lower() == "true") == expected[0]["hallucination"]
diff --git a/tests/modelserver/test_hallucination_data.yaml b/tests/modelserver/test_hallucination_data.yaml
deleted file mode 100644
index 935a8f5f..00000000
--- a/tests/modelserver/test_hallucination_data.yaml
+++ /dev/null
@@ -1,257 +0,0 @@
-test_cases:
-  - id: "[WEATHER AGENT] - single turn, single tool, prompt prefilling"
-    input:
-      messages:
-        - role: "user"
-          content: "what is the weather forecast for seattle?"
-      tools:
-        - type: "function"
-          function:
-            name: "get_current_weather"
-            description: "Get current weather at a location."
-            parameters:
-              type: "object"
-              properties:
-                location:
-                  type: "string"
-                  description: "The location to get the weather for"
-                  format: "City, State"
-                days:
-                  type: "integer"
-                  description: "The number of days for the request."
-              required:
-                - location
-                - days
-    expected:
-      - type: "metadata"
-        hallucination: false
-
-  - id: "[WEATHER AGENT] - single turn, single tool, hallucination"
-    input:
-      messages:
-        - role: "user"
-          content: "what is the weather in Seattle in days?"
-      tools:
-        - type: "function"
-          function:
-            name: "get_current_weather"
-            description: "Get current weather at a location."
-            parameters:
-              type: "object"
-              properties:
-                location:
-                  type: "str"
-                  description: "The location to get the weather for"
-                  format: "City, State"
-                days:
-                  type: "int"
-                  description: "the number of days for the request."
-              required: ["location", "days"]
-    expected:
-    - type: "metadata"
-      hallucination: true
-
-  - id: "[WEATHER AGENT] - multi turn, single tool, all params passed"
-    input:
-      messages:
-        - role: "user"
-          content: "how is the weather in chicago for next 5 days?"
-        - role: "assistant"
-          content: "Can you tell me your location and how many days you want?"
-        - role: "user"
-          content: "Seattle"
-        - role: "assistant"
-          content: "Can you please provide me the days for the weather forecast?"
-        - role: "user"
-          content: "5 days"
-      tools:
-        - type: "function"
-          function:
-            name: "get_current_weather"
-            description: "Get current weather at a location."
-            parameters:
-              type: "object"
-              properties:
-                location:
-                  type: "str"
-                  description: "The location to get the weather for"
-                  format: "City, State"
-                days:
-                  type: "int"
-                  description: "the number of days for the request."
-              required: ["location", "days"]
-    expected:
-    - type: "metadata"
-      hallucination: false
-
-  - id: "[WEATHER AGENT] - multi turn, single tool, clarification"
-    input:
-      messages:
-        - role: "user"
-          content: "how is the weather for next 5 days?"
-        - role: "assistant"
-          content: "Can you tell me your location and how many days you want?"
-        - role: "user"
-          content: "Seattle"
-        - role: "assistant"
-          content: "Can you please provide me the days for the weather forecast?"
-        - role: "user"
-          content: "Sorry, the location is actually los angeles in 5 days"
-      tools:
-        - type: "function"
-          function:
-            name: "get_current_weather"
-            description: "Get current weather at a location."
-            parameters:
-              type: "object"
-              properties:
-                location:
-                  type: "str"
-                  description: "The location to get the weather for"
-                  format: "City, State"
-                days:
-                  type: "int"
-                  description: "the number of days for the request."
-              required: ["location", "days"]
-    expected:
-    - type: "metadata"
-      hallucination: false
-
-  - id: "[SALE AGENT] - single turn, single tool, hallucination region"
-    input:
-      messages:
-      - role: "user"
-        content: "get me sales opportunities of tech"
-      tools:
-        - type: "function"
-          function:
-            name: "sales_opportunity"
-            description: "Retrieve potential sales opportunities based for a particular industry type in a region."
-            parameters:
-              type: "object"
-              properties:
-                region:
-                  type: "str"
-                  description: "Geographical region to identify sales opportunities."
-                industry:
-                  type: "str"
-                  description: "Industry type."
-                max_results:
-                  type: "int"
-                  description: "Maximum number of sales opportunities to retrieve."
-                  default: 20
-              required: ["region", "industry"]
-    expected:
-    - type: "metadata"
-      hallucination: true
-
-  - id: "[SALE AGENT] - single turn, single tool, hallucination industry"
-    input:
-      messages:
-      - role: "user"
-        content: "get me sales opportunities in NA"
-      tools:
-        - type: "function"
-          function:
-            name: "sales_opportunity"
-            description: "Retrieve potential sales opportunities based for a particular industry type in a region."
-            parameters:
-              type: "object"
-              properties:
-                region:
-                  type: "str"
-                  description: "Geographical region to identify sales opportunities."
-                industry:
-                  type: "str"
-                  description: "Industry type."
-                max_results:
-                  type: "int"
-                  description: "Maximum number of sales opportunities to retrieve."
-                  default: 20
-              required: ["region", "industry"]
-    expected:
-    - type: "metadata"
-      hallucination: true
-
-  - id: "[PRODUCT AGENT] - single turn, single tool, hallucination industry"
-    input:
-      messages:
-      - role: "user"
-        content: "get me sales opportunities in NA"
-      tools:
-        - type: "function"
-          function:
-            name: "product_recommendation"
-            description: "Place an order for an iphone with user_id 195 and location is 1600 pensylvania ave"
-            parameters:
-              type: "object"
-              properties:
-                user_id:
-                  type: "str"
-                  description: "Unique identifier for the user."
-                category:
-                  type: "str"
-                  description: "Product category for recommendations."
-                max_results:
-                  type: "int"
-                  description: "Maximum number of recommended products to show."
-                  default: 10
-              required: ["user_id", "category"]
-        - type: "function"
-          function:
-            name: "place_order"
-            description: "Place and pay for an order for one or more products to ship to the an address."
-            parameters:
-              type: "object"
-              properties:
-                user_id:
-                  type: "str"
-                  description: "Unique identifier for the user placing the order."
-                product_ids:
-                  type: "array"
-                  description: "List of product IDs to include in the order."
-                shipping_address:
-                  type: "str"
-                  description: "Shipping address for the order."
-                payment_method:
-                  type: "str"
-                  description: "Payment method for the order."
-              required: ["user_id", "product_ids", "shipping_address", "payment_method"]
-        - type: "function"
-          function:
-            name: "sales_opportunity"
-            description: "Retrieve potential sales opportunities based for a particular industry type in a region."
-            parameters:
-              type: "object"
-              properties:
-                region:
-                  type: "str"
-                  description: "Geographical region to identify sales opportunities."
-                industry:
-                  type: "str"
-                  description: "Industry type."
-                max_results:
-                  type: "int"
-                  description: "Maximum number of sales opportunities to retrieve."
-                  default: 20
-              required: ["region", "industry"]
-        - type: "function"
-          function:
-            name: "query_database"
-            description: "Perform a database query to retrieve or update information."
-            parameters:
-              type: "object"
-              properties:
-                query:
-                  type: "str"
-                  description: "SQL query string to execute against the database."
-                parameters:
-                  type: "array"
-                  description: "List of parameters to safely inject into the SQL query (to prevent SQL injection)."
-                operation:
-                  type: "str"
-                  description: "Type of operation."
-              required: ["query", "operation"]
-    expected:
-    - type: "metadata"
-      hallucination: true
diff --git a/tests/modelserver/test_modelserver.py b/tests/modelserver/test_modelserver.py
index 4596606f..f18c803c 100644
--- a/tests/modelserver/test_modelserver.py
+++ b/tests/modelserver/test_modelserver.py
@@ -10,7 +10,7 @@ pytestmark = pytest.mark.skip(
 )
 
 MODEL_SERVER_ENDPOINT = os.getenv(
-    "MODEL_SERVER_ENDPOINT", "http://localhost:51000/function_calling"
+    "MODEL_SERVER_ENDPOINT", "http://localhost:12000/function_calling"
 )
 
 # Load test data from YAML file
diff --git a/tests/rest/api_model_server.rest b/tests/rest/api_model_server.rest
index 5fdbf968..9c094c19 100644
--- a/tests/rest/api_model_server.rest
+++ b/tests/rest/api_model_server.rest
@@ -1,4 +1,4 @@
-@model_server_endpoint = http://localhost:51000
+@model_server_endpoint = http://localhost:12000
 @archfc_endpoint = https://archfc.katanemo.dev
 
 ### talk to function calling endpoint
diff --git a/tests/rest/insurance_agent.rest b/tests/rest/insurance_agent.rest
index c45ebb85..f5a86f8f 100644
--- a/tests/rest/insurance_agent.rest
+++ b/tests/rest/insurance_agent.rest
@@ -1,4 +1,4 @@
-@model_server_endpoint = http://localhost:51000
+@model_server_endpoint = http://localhost:12000
 @archfc_endpoint = https://archfc.katanemo.dev
 
 ### multi turn conversation with intent, except parameter gathering
@@ -54,26 +54,8 @@ Content-Type: application/json
     }
   ]
 }
-### talk to Arch-Intent directly for completion
-POST https://archfc.katanemo.dev/v1/chat/completions HTTP/1.1
-Content-Type: application/json
-
-{
-  "model": "Arch-Intent",
-  "messages": [
-    {
-      "role": "system",
-      "content": "You are a helpful assistant.\n\nYou task is to check if there are any tools that can be used to help the last user message in conversations according to the available tools listed below.\n\n<tools>\n{\"index\": \"T0\", \"type\": \"function\", \"function\": {\"name\": \"weather_forecast\", \"parameters\": {\"type\": \"object\", \"properties\": {\"city\": {\"type\": \"str\"}, \"days\": {\"type\": \"int\"}}, \"required\": [\"city\", \"days\"]}}}\n</tools>\n\nProvide your tool assessment for ONLY THE LAST USER MESSAGE in the above conversation:\n- First line must read 'Yes' or 'No'.\n- If yes, a second line must include a comma-separated list of tool indexes.\n"
-    },
-    { "role": "user", "content": "hi" }
-  ],
-  "stream": false
-}
-
-
-
-### multi turn conversation with correct parameters
 
+### multi turn conversation with intent, except parameter gathering
 POST {{model_server_endpoint}}/function_calling HTTP/1.1
 Content-Type: application/json
 
@@ -125,21 +107,6 @@ Content-Type: application/json
     }
   ]
 }
-### talk to Arch-Intent directly for completion, expect No
-POST https://archfc.katanemo.dev/v1/chat/completions HTTP/1.1
-Content-Type: application/json
-
-{
-  "model": "Arch-Intent",
-  "messages": [
-    {
-      "role": "system",
-      "content": "You are a helpful assistant.\n\nYou task is to check if there are any tools that can be used to help the last user message in conversations according to the available tools listed below.\n\n<tools>\n{\"index\": \"T0\", \"type\": \"function\", \"function\": {\"name\": \"weather_forecast\", \"parameters\": {\"type\": \"object\", \"properties\": {\"city\": {\"type\": \"str\"}, \"days\": {\"type\": \"int\"}}, \"required\": [\"city\", \"days\"]}}}\n</tools>\n\nProvide your tool assessment for ONLY THE LAST USER MESSAGE in the above conversation:\n- First line must read 'Yes' or 'No'.\n- If yes, a second line must include a comma-separated list of tool indexes.\n"
-    },
-    { "role": "user", "content": "what is your name" }
-  ],
-  "stream": false
-}
 
 ### multi turn conversation with correct parameters
 POST {{model_server_endpoint}}/function_calling HTTP/1.1
diff --git a/tests/rest/network_agent.rest b/tests/rest/network_agent.rest
index dc03fa6c..07f746ca 100644
--- a/tests/rest/network_agent.rest
+++ b/tests/rest/network_agent.rest
@@ -1,4 +1,4 @@
-@model_server_endpoint = http://localhost:51000
+@model_server_endpoint = http://localhost:12000
 @archfc_endpoint = https://archfc.katanemo.dev
 
 ### single turn function calling all parameters insurance agent summary