diff --git a/arch/arch_config_schema.yaml b/arch/arch_config_schema.yaml
index 0fe980dd..67073c79 100644
--- a/arch/arch_config_schema.yaml
+++ b/arch/arch_config_schema.yaml
@@ -51,11 +51,12 @@ properties:
           type: string
         default:
           type: boolean
+        endpoint:
+          type: string
       additionalProperties: false
       required:
         - name
         - provider
-        - access_key
         - model
   overrides:
     type: object
diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml
index 5eac257f..3e278c1c 100644
--- a/arch/envoy.template.yaml
+++ b/arch/envoy.template.yaml
@@ -538,6 +538,24 @@ static_resources:
               tls_maximum_protocol_version: TLSv1_3
       {% endif %}
 {% endfor %}
+
+{% for local_llm_provider in local_llms %}
+    - name: {{ local_llm_provider.name }}
+      connect_timeout: 5s
+      type: LOGICAL_DNS
+      dns_lookup_family: V4_ONLY
+      lb_policy: ROUND_ROBIN
+      load_assignment:
+        cluster_name: {{ local_llm_provider.name }}
+        endpoints:
+          - lb_endpoints:
+              - endpoint:
+                  address:
+                    socket_address:
+                      address: {{ local_llm_provider.endpoint }}
+                      port_value: {{ local_llm_provider.port }}
+                  hostname: {{ local_llm_provider.endpoint }}
+{% endfor %}
     - name: arch_internal
       connect_timeout: 5s
       type: LOGICAL_DNS
diff --git a/arch/tools/cli/config_generator.py b/arch/tools/cli/config_generator.py
index 3393bb5c..5379e909 100644
--- a/arch/tools/cli/config_generator.py
+++ b/arch/tools/cli/config_generator.py
@@ -16,18 +16,6 @@ ARCH_CONFIG_SCHEMA_FILE = os.getenv(
 )
 
 
-def add_secret_key_to_llm_providers(config_yaml):
-    llm_providers = []
-    for llm_provider in config_yaml.get("llm_providers", []):
-        access_key_env_var = llm_provider.get("access_key", False)
-        access_key_value = os.getenv(access_key_env_var, False)
-        if access_key_env_var and access_key_value:
-            llm_provider["access_key"] = access_key_value
-        llm_providers.append(llm_provider)
-    config_yaml["llm_providers"] = llm_providers
-    return config_yaml
-
-
 def validate_and_render_schema():
     env = Environment(loader=FileSystemLoader("./"))
     template = env.get_template("envoy.template.yaml")
@@ -76,12 +64,23 @@ def validate_and_render_schema():
     config_yaml["mode"] = "llm"
     arch_llm_config_string = yaml.dump(config_yaml)
 
+    llms_with_endpoint = []
+
+    for llm_provider in arch_llm_providers:
+        if llm_provider.get("endpoint", None):
+            endpoint = llm_provider["endpoint"]
+            if len(endpoint.split(":")) > 1:
+                llm_provider["endpoint"] = endpoint.split(":")[0]
+                llm_provider["port"] = int(endpoint.split(":")[1])
+            llms_with_endpoint.append(llm_provider)
+
     data = {
         "arch_config": arch_config_string,
         "arch_llm_config": arch_llm_config_string,
         "arch_clusters": inferred_clusters,
         "arch_llm_providers": arch_llm_providers,
         "arch_tracing": arch_tracing,
+        "local_llms": llms_with_endpoint,
     }
 
     rendered = template.render(data)
diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs
index e83c1117..e196be21 100644
--- a/crates/common/src/configuration.rs
+++ b/crates/common/src/configuration.rs
@@ -171,6 +171,8 @@ pub struct LlmProvider {
     pub model: String,
     pub default: Option<bool>,
     pub stream: Option<bool>,
+    pub endpoint: Option<String>,
+    pub port: Option<u16>,
     pub rate_limits: Option<LlmRatelimit>,
 }
 
diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs
index bb36816f..50f46ac2 100644
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@@ -177,7 +177,10 @@ impl HttpContext for StreamContext {
         self.add_http_request_header(ARCH_ROUTING_HEADER, &self.llm_provider().name);
 
         if let Err(error) = self.modify_auth_headers() {
-            self.send_server_error(error, Some(StatusCode::BAD_REQUEST));
+            // ensure that the provider has an endpoint if the access key is missing else return a bad request
+            if self.llm_provider.as_ref().unwrap().endpoint.is_none() {
+                self.send_server_error(error, Some(StatusCode::BAD_REQUEST));
+            }
         }
         self.delete_content_length_header();
         self.save_ratelimit_header();
diff --git a/tests/archgw/arch_config.yaml b/tests/archgw/arch_config.yaml
index 3f450717..e9afa5c1 100644
--- a/tests/archgw/arch_config.yaml
+++ b/tests/archgw/arch_config.yaml
@@ -12,22 +12,13 @@ endpoints:
     connect_timeout: 0.005s
 
 llm_providers:
-  - name: gpt-4o-mini
-    access_key: $OPENAI_API_KEY
-    provider: openai
-    model: gpt-4o-mini
+
+  - name: local-llm
+    provider: local-llm
+    endpoint: host.docker.internal:51002
+    model: test-local-model
     default: true
 
-  - name: gpt-3.5-turbo-0125
-    access_key: $OPENAI_API_KEY
-    provider: openai
-    model: gpt-3.5-turbo-0125
-
-  - name: gpt-4o
-    access_key: $OPENAI_API_KEY
-    provider: openai
-    model: gpt-4o
-
 system_prompt: |
   You are a helpful assistant.