feat: add passthrough_auth option for forwarding client Authorization header (#687)

* feat: add passthrough_auth option for forwarding client Authorization header * fix tests * Update comment to reflect upstream forwarding * Apply suggestions from code review --------- Co-authored-by: Adil Hafeez <adil.hafeez@gmail.com> Co-authored-by: Adil Hafeez <adil@katanemo.com>
2026-07-11 16:12:13 +02:00 · 2026-01-15 00:06:28 +01:00 · 2026-01-15 00:06:28 +01:00 · 4d53297c17
commit 4d53297c17
parent ba1f783adf
8 changed files with 177 additions and 26 deletions
--- a/docs/source/concepts/llm_providers/supported_providers.rst
+++ b/docs/source/concepts/llm_providers/supported_providers.rst
@ -728,6 +728,75 @@ Configure routing preferences for dynamic model selection:
          - name: creative_writing
            description: creative content generation, storytelling, and writing assistance

+.. _passthrough_auth:
+
+Passthrough Authentication
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When deploying Plano in front of LLM proxy services that manage their own API key validation (such as LiteLLM, OpenRouter, or custom gateways), you may want to forward the client's original ``Authorization`` header instead of replacing it with a configured ``access_key``.
+
+The ``passthrough_auth`` option enables this behavior:
+
+.. code-block:: yaml
+
+    llm_providers:
+      # Forward client's Authorization header to LiteLLM
+      - model: openai/gpt-4o-litellm
+        base_url: https://litellm.example.com
+        passthrough_auth: true
+        default: true
+
+      # Forward to OpenRouter
+      - model: openai/claude-3-opus
+        base_url: https://openrouter.ai/api/v1
+        passthrough_auth: true
+
+**How it works:**
+
+1. Client sends a request with ``Authorization: Bearer <virtual-key>``
+2. Plano preserves this header instead of replacing it with ``access_key``
+3. The upstream service (e.g., LiteLLM) validates the virtual key
+4. Response flows back through Plano to the client
+
+**Use Cases:**
+
+- **LiteLLM Integration**: Route requests to LiteLLM which manages virtual keys and rate limits
+- **OpenRouter**: Forward requests to OpenRouter with per-user API keys
+- **Custom API Gateways**: Integrate with internal gateways that have their own authentication
+- **Multi-tenant Deployments**: Allow different clients to use their own credentials
+
+**Important Notes:**
+
+- When ``passthrough_auth: true`` is set, the ``access_key`` field is ignored (a warning is logged if both are configured)
+- If the client doesn't provide an ``Authorization`` header, the request is forwarded without authentication (upstream will likely return 401)
+- The ``base_url`` is typically required when using ``passthrough_auth``
+
+**Configuration with LiteLLM example:**
+
+.. code-block:: yaml
+
+    # plano_config.yaml
+    version: v0.3.0
+
+    listeners:
+      - name: llm
+        type: model
+        port: 10000
+
+    model_providers:
+      - model: openai/gpt-4o
+        base_url: https://litellm.example.com
+        passthrough_auth: true
+        default: true
+
+.. code-block:: bash
+
+    # Client request - virtual key is forwarded to upstream
+    curl http://localhost:10000/v1/chat/completions \
+      -H "Authorization: Bearer sk-litellm-virtual-key-abc123" \
+      -H "Content-Type: application/json" \
+      -d '{"model": "gpt-4o", "messages": [{"role": "user", "content": "Hello"}]}'
+
 Model Selection Guidelines
 --------------------------

--- a/docs/source/resources/includes/arch_config_full_reference.yaml
+++ b/docs/source/resources/includes/arch_config_full_reference.yaml
@ -1,26 +1,22 @@
-
 # Arch Gateway configuration version
 version: v0.3.0

-
 # External HTTP agents - API type is controlled by request path (/v1/responses, /v1/messages, /v1/chat/completions)
 agents:
-  - id: weather_agent  # Example agent for weather
+  - id: weather_agent # Example agent for weather
    url: http://host.docker.internal:10510

-  - id: flight_agent   # Example agent for flights
+  - id: flight_agent # Example agent for flights
    url: http://host.docker.internal:10520

-
 # MCP filters applied to requests/responses (e.g., input validation, query rewriting)
 filters:
-  - id: input_guards  # Example filter for input validation
+  - id: input_guards # Example filter for input validation
    url: http://host.docker.internal:10500
    # type: mcp (default)
    # transport: streamable-http (default)
    # tool: input_guards (default - same as filter id)

-
 # LLM provider configurations with API keys and model routing
 model_providers:
  - model: openai/gpt-4o
@ -36,6 +32,12 @@ model_providers:
  - model: mistral/ministral-3b-latest
    access_key: $MISTRAL_API_KEY

+  # Example: Passthrough authentication for LiteLLM or similar proxies
+  # When passthrough_auth is true, client's Authorization header is forwarded
+  # instead of using the configured access_key
+  - model: openai/gpt-4o-litellm
+    base_url: https://litellm.example.com
+    passthrough_auth: true

 # Model aliases - use friendly names instead of full provider model names
 model_aliases:
@ -45,7 +47,6 @@ model_aliases:
  smart-llm:
    target: gpt-4o

-
 # HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access
 listeners:
  # Agent listener for routing requests to multiple agents
@ -73,7 +74,6 @@ listeners:
    port: 10000
    # This listener is used for prompt_targets and function calling

-
 # Reusable service endpoints
 endpoints:
  app_server:
@ -83,7 +83,6 @@ endpoints:
  mistral_local:
    endpoint: 127.0.0.1:8001

-
 # Prompt targets for function calling and API orchestration
 prompt_targets:
  - name: get_current_weather
@ -103,7 +102,6 @@ prompt_targets:
      path: /weather
      http_method: POST

-
 # OpenTelemetry tracing configuration
 tracing:
  # Random sampling percentage (1-100)
--- a/docs/source/resources/includes/arch_config_full_reference_rendered.yaml
+++ b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml
@ -64,6 +64,15 @@ listeners:
    model: ministral-3b-latest
    name: mistral/ministral-3b-latest
    provider_interface: mistral
+  - base_url: https://litellm.example.com
+    cluster_name: openai_litellm.example.com
+    endpoint: litellm.example.com
+    model: gpt-4o-litellm
+    name: openai/gpt-4o-litellm
+    passthrough_auth: true
+    port: 443
+    protocol: https
+    provider_interface: openai
  name: egress_traffic
  port: 12000
  timeout: 30s
@ -91,6 +100,15 @@ model_providers:
  model: ministral-3b-latest
  name: mistral/ministral-3b-latest
  provider_interface: mistral
+- base_url: https://litellm.example.com
+  cluster_name: openai_litellm.example.com
+  endpoint: litellm.example.com
+  model: gpt-4o-litellm
+  name: openai/gpt-4o-litellm
+  passthrough_auth: true
+  port: 443
+  protocol: https
+  provider_interface: openai
 - internal: true
  model: Arch-Function
  name: arch-function