diff --git a/.buildinfo b/.buildinfo
index d2102ed1..a21e115d 100755
--- a/.buildinfo
+++ b/.buildinfo
@@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file records the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: 9db6364d8186d5eaae6b4148a1288a59
+config: d54d7379a33d5f6b3c1acac04a881e38
tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/CNAME b/CNAME
index 5a6f76d1..f7a19b2f 100644
--- a/CNAME
+++ b/CNAME
@@ -1 +1 @@
-docs.planoai.dev
\ No newline at end of file
+docs.planoai.dev
diff --git a/_downloads/ca9d3b7116524473d8adbde7cf15d167/arch_config_full_reference.yaml b/_downloads/ca9d3b7116524473d8adbde7cf15d167/arch_config_full_reference.yaml
index c9d5e4ff..aa186c26 100755
--- a/_downloads/ca9d3b7116524473d8adbde7cf15d167/arch_config_full_reference.yaml
+++ b/_downloads/ca9d3b7116524473d8adbde7cf15d167/arch_config_full_reference.yaml
@@ -1,100 +1,110 @@
-version: v0.1
+# Arch Gateway configuration version
+version: v0.3.0
+
+
+# External HTTP agents - API type is controlled by request path (/v1/responses, /v1/messages, /v1/chat/completions)
+agents:
+ - id: weather_agent # Example agent for weather
+ url: http://host.docker.internal:10510
+
+ - id: flight_agent # Example agent for flights
+ url: http://host.docker.internal:10520
+
+
+# MCP filters applied to requests/responses (e.g., input validation, query rewriting)
+filters:
+ - id: input_guards # Example filter for input validation
+ url: http://host.docker.internal:10500
+ # type: mcp (default)
+ # transport: streamable-http (default)
+ # tool: input_guards (default - same as filter id)
+
+
+# LLM provider configurations with API keys and model routing
+model_providers:
+ - model: openai/gpt-4o
+ access_key: $OPENAI_API_KEY
+ default: true
+
+ - model: openai/gpt-4o-mini
+ access_key: $OPENAI_API_KEY
+
+ - model: anthropic/claude-sonnet-4-0
+ access_key: $ANTHROPIC_API_KEY
+
+ - model: mistral/ministral-3b-latest
+ access_key: $MISTRAL_API_KEY
+
+
+# Model aliases - use friendly names instead of full provider model names
+model_aliases:
+ fast-llm:
+ target: gpt-4o-mini
+
+ smart-llm:
+ target: gpt-4o
+
+
+# HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access
listeners:
- ingress_traffic:
+ # Agent listener for routing requests to multiple agents
+ - type: agent
+ name: travel_booking_service
+ port: 8001
+ router: plano_orchestrator_v1
address: 0.0.0.0
- port: 10000
- message_format: openai
- timeout: 5s
- egress_traffic:
+ agents:
+ - id: rag_agent
+ description: virtual assistant for retrieval augmented generation tasks
+ filter_chain:
+ - input_guards
+
+ # Model listener for direct LLM access
+ - type: model
+ name: model_1
address: 0.0.0.0
port: 12000
- message_format: openai
- timeout: 5s
-# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
+ # Prompt listener for function calling (for prompt_targets)
+ - type: prompt
+ name: prompt_function_listener
+ address: 0.0.0.0
+ port: 10000
+ # This listener is used for prompt_targets and function calling
+
+
+# Reusable service endpoints
endpoints:
app_server:
- # value could be ip address or a hostname with port
- # this could also be a list of endpoints for load balancing
- # for example endpoint: [ ip1:port, ip2:port ]
endpoint: 127.0.0.1:80
- # max time to wait for a connection to be established
connect_timeout: 0.005s
mistral_local:
endpoint: 127.0.0.1:8001
- error_target:
- endpoint: error_target_1
-
-# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
-llm_providers:
- - name: openai/gpt-4o
- access_key: $OPENAI_API_KEY
- model: openai/gpt-4o
- default: true
-
- - access_key: $MISTRAL_API_KEY
- model: mistral/mistral-8x7b
-
- - model: mistral/mistral-7b-instruct
- base_url: http://mistral_local
-
-# Model aliases - friendly names that map to actual provider names
-model_aliases:
- # Alias for summarization tasks -> fast/cheap model
- arch.summarize.v1:
- target: gpt-4o
-
- # Alias for general purpose tasks -> latest model
- arch.v1:
- target: mistral-8x7b
-
-# provides a way to override default settings for the arch system
-overrides:
- # By default Arch uses an NLI + embedding approach to match an incoming prompt to a prompt target.
- # The intent matching threshold is kept at 0.80, you can override this behavior if you would like
- prompt_target_intent_matching_threshold: 0.60
-
-# default system prompt used by all prompt targets
-system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
-
-prompt_guards:
- input_guards:
- jailbreak:
- on_exception:
- message: Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters.
+# Prompt targets for function calling and API orchestration
prompt_targets:
- - name: information_extraction
- default: true
- description: handel all scenarios that are question and answer in nature. Like summarization, information extraction, etc.
- endpoint:
- name: app_server
- path: /agent/summary
- http_method: POST
- # Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM
- auto_llm_dispatch_on_response: true
- # override system prompt for this prompt target
- system_prompt: You are a helpful information extraction assistant. Use the information that is provided to you.
-
- - name: reboot_network_device
- description: Reboot a specific network device
- endpoint:
- name: app_server
- path: /agent/action
+ - name: get_current_weather
+ description: Get current weather at a location.
parameters:
- - name: device_id
- type: str
- description: Identifier of the network device to reboot.
+ - name: location
+ description: The location to get the weather for
required: true
- - name: confirmation
- type: bool
- description: Confirmation flag to proceed with reboot.
- default: false
- enum: [true, false]
+ type: string
+ format: City, State
+ - name: days
+ description: the number of days for the request
+ required: true
+ type: int
+ endpoint:
+ name: app_server
+ path: /weather
+ http_method: POST
+
+# OpenTelemetry tracing configuration
tracing:
- # sampling rate. Note by default Arch works on OpenTelemetry compatible tracing.
- sampling_rate: 0.1
+ # Random sampling percentage (1-100)
+ random_sampling: 100
diff --git a/_images/PlanoTagline.svg b/_images/PlanoTagline.svg
new file mode 100755
index 00000000..c0c10548
--- /dev/null
+++ b/_images/PlanoTagline.svg
@@ -0,0 +1,56 @@
+
diff --git a/_images/arch-logo.png b/_images/arch-logo.png
deleted file mode 100755
index bbffb318..00000000
Binary files a/_images/arch-logo.png and /dev/null differ
diff --git a/_images/arch-system-architecture.jpg b/_images/arch-system-architecture.jpg
deleted file mode 100755
index 3c8839a7..00000000
Binary files a/_images/arch-system-architecture.jpg and /dev/null differ
diff --git a/_images/arch_network_diagram_high_level.png b/_images/arch_network_diagram_high_level.png
deleted file mode 100755
index e83e7165..00000000
Binary files a/_images/arch_network_diagram_high_level.png and /dev/null differ
diff --git a/_images/function-calling-flow.jpg b/_images/function-calling-flow.jpg
deleted file mode 100755
index 9f0f4a59..00000000
Binary files a/_images/function-calling-flow.jpg and /dev/null differ
diff --git a/_images/network-topology-agent.jpg b/_images/network-topology-agent.jpg
deleted file mode 100755
index 50ba9a64..00000000
Binary files a/_images/network-topology-agent.jpg and /dev/null differ
diff --git a/_images/network-topology-ingress-egress.jpg b/_images/network-topology-ingress-egress.jpg
deleted file mode 100755
index 03e36e77..00000000
Binary files a/_images/network-topology-ingress-egress.jpg and /dev/null differ
diff --git a/_images/network-topology-ingress-egress.png b/_images/network-topology-ingress-egress.png
new file mode 100755
index 00000000..c2e55584
Binary files /dev/null and b/_images/network-topology-ingress-egress.png differ
diff --git a/_images/plano-system-architecture.png b/_images/plano-system-architecture.png
new file mode 100755
index 00000000..792477d5
Binary files /dev/null and b/_images/plano-system-architecture.png differ
diff --git a/_images/plano_network_diagram_high_level.png b/_images/plano_network_diagram_high_level.png
new file mode 100755
index 00000000..da1c5b92
Binary files /dev/null and b/_images/plano_network_diagram_high_level.png differ
diff --git a/_images/tracing.png b/_images/tracing.png
index 91d6a82b..bb34db91 100755
Binary files a/_images/tracing.png and b/_images/tracing.png differ
diff --git a/_static/css/custom.css b/_static/css/custom.css
new file mode 100755
index 00000000..b7ccb7aa
--- /dev/null
+++ b/_static/css/custom.css
@@ -0,0 +1,6 @@
+/* Prevent sphinxawesome-theme's Tailwind utility `dark:invert` from inverting the header logo. */
+.dark header img[alt="Logo"],
+.dark #left-sidebar img[alt="Logo"] {
+ --tw-invert: invert(0%) !important;
+ filter: none !important;
+}
diff --git a/_static/documentation_options.js b/_static/documentation_options.js
index 0ba5ec80..caaa7577 100755
--- a/_static/documentation_options.js
+++ b/_static/documentation_options.js
@@ -1,5 +1,5 @@
const DOCUMENTATION_OPTIONS = {
- VERSION: ' v0.3.22',
+ VERSION: ' v0.4',
LANGUAGE: 'en',
COLLAPSE_INDEX: false,
BUILDER: 'html',
diff --git a/_static/favicon.ico b/_static/favicon.ico
index 29dc5902..a1b75eb4 100755
Binary files a/_static/favicon.ico and b/_static/favicon.ico differ
diff --git a/_static/img/PlanoTagline.svg b/_static/img/PlanoTagline.svg
new file mode 100755
index 00000000..c0c10548
--- /dev/null
+++ b/_static/img/PlanoTagline.svg
@@ -0,0 +1,56 @@
+
diff --git a/_static/img/arch-logo.png b/_static/img/arch-logo.png
deleted file mode 100755
index bbffb318..00000000
Binary files a/_static/img/arch-logo.png and /dev/null differ
diff --git a/_static/img/arch-nav-logo.png b/_static/img/arch-nav-logo.png
deleted file mode 100755
index 5a1a7776..00000000
Binary files a/_static/img/arch-nav-logo.png and /dev/null differ
diff --git a/_static/img/arch-system-architecture.jpg b/_static/img/arch-system-architecture.jpg
deleted file mode 100755
index 3c8839a7..00000000
Binary files a/_static/img/arch-system-architecture.jpg and /dev/null differ
diff --git a/_static/img/arch_network_diagram_high_level.png b/_static/img/arch_network_diagram_high_level.png
deleted file mode 100755
index e83e7165..00000000
Binary files a/_static/img/arch_network_diagram_high_level.png and /dev/null differ
diff --git a/_static/img/network-topology-agent.jpg b/_static/img/network-topology-agent.jpg
deleted file mode 100755
index 50ba9a64..00000000
Binary files a/_static/img/network-topology-agent.jpg and /dev/null differ
diff --git a/_static/img/network-topology-ingress-egress.jpg b/_static/img/network-topology-ingress-egress.jpg
deleted file mode 100755
index 03e36e77..00000000
Binary files a/_static/img/network-topology-ingress-egress.jpg and /dev/null differ
diff --git a/_static/img/network-topology-ingress-egress.png b/_static/img/network-topology-ingress-egress.png
new file mode 100755
index 00000000..c2e55584
Binary files /dev/null and b/_static/img/network-topology-ingress-egress.png differ
diff --git a/_static/img/plano-system-architecture.png b/_static/img/plano-system-architecture.png
new file mode 100755
index 00000000..792477d5
Binary files /dev/null and b/_static/img/plano-system-architecture.png differ
diff --git a/_static/img/plano_network_diagram_high_level.png b/_static/img/plano_network_diagram_high_level.png
new file mode 100755
index 00000000..da1c5b92
Binary files /dev/null and b/_static/img/plano_network_diagram_high_level.png differ
diff --git a/_static/img/tracing.png b/_static/img/tracing.png
index 91d6a82b..bb34db91 100755
Binary files a/_static/img/tracing.png and b/_static/img/tracing.png differ
diff --git a/build_with_arch/agent.html b/build_with_arch/agent.html
deleted file mode 100755
index 58e724de..00000000
--- a/build_with_arch/agent.html
+++ /dev/null
@@ -1,432 +0,0 @@
-
-
-
-
Arch helps you build personalized agentic applications by calling application-specific (API) functions via user prompts.
-This involves any predefined functions or APIs you want to expose to users to perform tasks, gather information,
-or manipulate data. This capability is generally referred to as function calling, where
-you can support “agentic” apps tailored to specific use cases - from updating insurance claims to creating ad campaigns - via prompts.
-
Arch analyzes prompts, extracts critical information from prompts, engages in lightweight conversation with the user to
-gather any missing parameters and makes API calls so that you can focus on writing business logic. Arch does this via its
-purpose-built Arch-Function -
-the fastest (200ms p50 - 12x faser than GPT-4o) and cheapest (44x than GPT-4o) function calling LLM that matches or outperforms
-frontier LLMs.
In the most common scenario, users will request a single action via prompts, and Arch efficiently processes the
-request by extracting relevant parameters, validating the input, and calling the designated function or API. Here
-is how you would go about enabling this scenario with Arch:
-
-
Step 1: Define Prompt Targets
-
-
Prompt Target Example Configuration
-
1version:v0.1
- 2listener:
- 3address:127.0.0.1
- 4port:8080#If you configure port 443, you'll need to update the listener with tls_certificates
- 5message_format:huggingface
- 6
- 7# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
- 8llm_providers:
- 9-name:OpenAI
-10provider:openai
-11access_key:$OPENAI_API_KEY
-12model:gpt-3.5-turbo
-13default:true
-14
-15# default system prompt used by all prompt targets
-16system_prompt:|
-17You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
-18
-19prompt_targets:
-20-name:network_qa
-21endpoint:
-22name:app_server
-23path:/agent/network_summary
-24description:Handle general Q/A related to networking.
-25default:true
-26-name:reboot_devices
-27description:Reboot specific devices or device groups
-28endpoint:
-29name:app_server
-30path:/agent/device_reboot
-31parameters:
-32-name:device_ids
-33type:list
-34description:A list of device identifiers (IDs) to reboot.
-35required:true
-36-name:device_summary
-37description:Retrieve statistics for specific devices within a time range
-38endpoint:
-39name:app_server
-40path:/agent/device_summary
-41parameters:
-42-name:device_ids
-43type:list
-44description:A list of device identifiers (IDs) to retrieve statistics for.
-45required:true# device_ids are required to get device statistics
-46-name:time_range
-47type:int
-48description:Time range in days for which to gather device statistics. Defaults to 7.
-49default:7
-50
-51# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
-52endpoints:
-53app_server:
-54# value could be ip address or a hostname with port
-55# this could also be a list of endpoints for load balancing
-56# for example endpoint: [ ip1:port, ip2:port ]
-57endpoint:host.docker.internal:18083
-58# max time to wait for a connection to be established
-59connect_timeout:0.005s
-
-
-
-
-
-
Step 2: Process Request Parameters
-
Once the prompt targets are configured as above, handling those parameters is
-
-
Parameter handling with Flask
-
1fromflaskimportFlask,request,jsonify
- 2
- 3app=Flask(__name__)
- 4
- 5
- 6@app.route("/agent/device_summary",methods=["POST"])
- 7defget_device_summary():
- 8"""
- 9 Endpoint to retrieve device statistics based on device IDs and an optional time range.
-10 """
-11data=request.get_json()
-12
-13# Validate 'device_ids' parameter
-14device_ids=data.get("device_ids")
-15ifnotdevice_idsornotisinstance(device_ids,list):
-16return(
-17jsonify({"error":"'device_ids' parameter is required and must be a list"}),
-18400,
-19)
-20
-21# Validate 'time_range' parameter (optional, defaults to 7)
-22time_range=data.get("time_range",7)
-23ifnotisinstance(time_range,int):
-24returnjsonify({"error":"'time_range' must be an integer"}),400
-25
-26# Simulate retrieving statistics for the given device IDs and time range
-27# In a real application, you would query your database or external service here
-28statistics=[]
-29fordevice_idindevice_ids:
-30# Placeholder for actual data retrieval
-31stats={
-32"device_id":device_id,
-33"time_range":f"Last {time_range} days",
-34"data":f"Statistics data for device {device_id} over the last {time_range} days.",
-35}
-36statistics.append(stats)
-37
-38response={"statistics":statistics}
-39
-40returnjsonify(response),200
-41
-42
-43if__name__=="__main__":
-44app.run(debug=True)
-
-
-
-
-
-
-
Parallel & Multiple Function Calling
-
In more complex use cases, users may request multiple actions or need multiple APIs/functions to be called
-simultaneously or sequentially. With Arch, you can handle these scenarios efficiently using parallel or multiple
-function calling. This allows your application to engage in a broader range of interactions, such as updating
-different datasets, triggering events across systems, or collecting results from multiple services in one prompt.
-
Arch-FC1B is built to manage these parallel tasks efficiently, ensuring low latency and high throughput, even
-when multiple functions are invoked. It provides two mechanisms to handle these cases:
-
-
Step 1: Define Prompt Targets
-
When enabling multiple function calling, define the prompt targets in a way that supports multiple functions or
-API calls based on the user’s prompt. These targets can be triggered in parallel or sequentially, depending on
-the user’s intent.
-
Example of Multiple Prompt Targets in YAML:
-
-
Prompt Target Example Configuration
-
1version:v0.1
- 2listener:
- 3address:127.0.0.1
- 4port:8080#If you configure port 443, you'll need to update the listener with tls_certificates
- 5message_format:huggingface
- 6
- 7# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
- 8llm_providers:
- 9-name:OpenAI
-10provider:openai
-11access_key:$OPENAI_API_KEY
-12model:gpt-3.5-turbo
-13default:true
-14
-15# default system prompt used by all prompt targets
-16system_prompt:|
-17You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
-18
-19prompt_targets:
-20-name:network_qa
-21endpoint:
-22name:app_server
-23path:/agent/network_summary
-24description:Handle general Q/A related to networking.
-25default:true
-26-name:reboot_devices
-27description:Reboot specific devices or device groups
-28endpoint:
-29name:app_server
-30path:/agent/device_reboot
-31parameters:
-32-name:device_ids
-33type:list
-34description:A list of device identifiers (IDs) to reboot.
-35required:true
-36-name:device_summary
-37description:Retrieve statistics for specific devices within a time range
-38endpoint:
-39name:app_server
-40path:/agent/device_summary
-41parameters:
-42-name:device_ids
-43type:list
-44description:A list of device identifiers (IDs) to retrieve statistics for.
-45required:true# device_ids are required to get device statistics
-46-name:time_range
-47type:int
-48description:Time range in days for which to gather device statistics. Defaults to 7.
-49default:7
-50
-51# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
-52endpoints:
-53app_server:
-54# value could be ip address or a hostname with port
-55# this could also be a list of endpoints for load balancing
-56# for example endpoint: [ ip1:port, ip2:port ]
-57endpoint:host.docker.internal:18083
-58# max time to wait for a connection to be established
-59connect_timeout:0.005s
-
Developers often struggle to efficiently handle
-follow-up or clarification questions. Specifically, when users ask for changes or additions to previous responses, it requires developers to
-re-write prompts using LLMs with precise prompt engineering techniques. This process is slow, manual, error prone and adds latency and token cost for
-common scenarios that can be managed more efficiently.
-
Arch is highly capable of accurately detecting and processing prompts in multi-turn scenarios so that you can buil fast and accurate agents in minutes.
-Below are some cnversational examples that you can build via Arch. Each example is enriched with annotations (via ** [Arch] ** ) that illustrates how Arch
-processess conversational messages on your behalf.
-
-
Note
-
The following section assumes that you have some knowledge about the core concepts of Arch, such as prompt_targets.
-If you haven’t familizaried yourself with Arch’s concepts, we recommend you first read the tech overview section firtst.
-Additionally, the conversation examples below assume the usage of the following arch_config.yaml file.
-
-
-
Example 1: Adjusting Retrieval
-
User: What are the benefits of renewable energy?
-**[Arch]**: Check if there is an available <prompt_target> that can handle this user query.
-**[Arch]**: Found "get_info_for_energy_source" prompt_target in arch_config.yaml. Forward prompt to the endpoint configured in "get_info_for_energy_source"
-...
-Assistant: Renewable energy reduces greenhouse gas emissions, lowers air pollution, and provides sustainable power sources like solar and wind.
-
-User: Include cost considerations in the response.
-**[Arch]**: Follow-up detected. Forward prompt history to the "get_info_for_energy_source" prompt_target and post the following parameters consideration="cost"
-...
-Assistant: Renewable energy reduces greenhouse gas emissions, lowers air pollution, and provides sustainable power sources like solar and wind. While the initial setup costs can be high, long-term savings from reduced fuel expenses and government incentives make it cost-effective.
-
-
-
-
-
Example 2: Switching Intent
-
User: What are the symptoms of diabetes?
-**[Arch]**: Check if there is an available <prompt_target> that can handle this user query.
-**[Arch]**: Found "diseases_symptoms" prompt_target in arch_config.yaml. Forward disease=diabeteres to "diseases_symptoms" prompt target
-...
-Assistant: Common symptoms include frequent urination, excessive thirst, fatigue, and blurry vision.
-
-User: How is it diagnosed?
-**[Arch]**: New intent detected.
-**[Arch]**: Found "disease_diagnoses" prompt_target in arch_config.yaml. Forward disease=diabeteres to "disease_diagnoses" prompt target
-...
-Assistant: Diabetes is diagnosed through blood tests like fasting blood sugar, A1C, or an oral glucose tolerance test.
-
-
-
-
-
Build Multi-Turn RAG Apps
-
The following section describes how you can easilly add support for multi-turn scenarios via Arch. You process and manage multi-turn prompts
-just like you manage single-turn ones. Arch handles the conpleixity of detecting the correct intent based on the last user prompt and
-the covnersational history, extracts relevant parameters needed by downstream APIs, and dipatches calls to any upstream LLMs to summarize the
-response from your APIs.
-
-
Step 1: Define Arch Config
-
-
Arch Config
-
1version:v0.1
- 2listener:
- 3address:127.0.0.1
- 4port:8080#If you configure port 443, you'll need to update the listener with tls_certificates
- 5message_format:huggingface
- 6
- 7# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
- 8llm_providers:
- 9-name:OpenAI
-10provider:openai
-11access_key:$OPENAI_API_KEY
-12model:gpt-3.5-turbo
-13default:true
-14
-15# default system prompt used by all prompt targets
-16system_prompt:|
-17You are a helpful assistant and can offer information about energy sources. You will get a JSON object with energy_source and consideration fields. Focus on answering using those fields
-18
-19prompt_targets:
-20-name:get_info_for_energy_source
-21description:get information about an energy source
-22parameters:
-23-name:energy_source
-24type:str
-25description:a source of energy
-26required:true
-27enum:[renewable,fossil]
-28-name:consideration
-29type:str
-30description:a specific type of consideration for an energy source
-31enum:[cost,economic,technology]
-32endpoint:
-33name:rag_energy_source_agent
-34path:/agent/energy_source_info
-35http_method:POST
-
-
-
-
-
-
Step 2: Process Request in Flask
-
Once the prompt targets are configured as above, handle parameters across multi-turn as if its a single-turn request
-
-
Parameter handling with Flask
-
1importos
- 2importgradioasgr
- 3
- 4fromfastapiimportFastAPI,HTTPException
- 5frompydanticimportBaseModel
- 6fromtypingimportOptional
- 7fromopenaiimportOpenAI
- 8fromcommonimportcreate_gradio_app
- 9
-10app=FastAPI()
-11
-12
-13# Define the request model
-14classEnergySourceRequest(BaseModel):
-15energy_source:str
-16consideration:Optional[str]=None
-17
-18
-19classEnergySourceResponse(BaseModel):
-20energy_source:str
-21consideration:Optional[str]=None
-22
-23
-24# Post method for device summary
-25@app.post("/agent/energy_source_info")
-26defget_workforce(request:EnergySourceRequest):
-27"""
-28 Endpoint to get details about energy source
-29 """
-30considertion="You don't have any specific consideration. Feel free to talk in a more open ended fashion"
-31
-32ifrequest.considerationisnotNone:
-33considertion=f"Add specific focus on the following consideration when you summarize the content for the energy source: {request.consideration}"
-34
-35response={
-36"energy_source":request.energy_source,
-37"consideration":considertion,
-38}
-39returnresponse
-
-
-
-
-
-
Demo App
-
For your convenience, we’ve built a demo app
-that you can test and modify locally for multi-turn RAG scenarios.
The following section describes how Arch can help you build faster, smarter and more accurate
-Retrieval-Augmented Generation (RAG) applications, including fast and accurate RAG in multi-turn
-converational scenarios.
-
-
What is Retrieval-Augmented Generation (RAG)?
-
RAG applications combine retrieval-based methods with generative AI models to provide more accurate,
-contextually relevant, and reliable outputs. These applications leverage external data sources to augment
-the capabilities of Large Language Models (LLMs), enabling them to retrieve and integrate specific information
-rather than relying solely on the LLM’s internal knowledge.
-
-
-
Parameter Extraction for RAG
-
To build RAG (Retrieval Augmented Generation) applications, you can configure prompt targets with parameters,
-enabling Arch to retrieve critical information in a structured way for processing. This approach improves the
-retrieval quality and speed of your application. By extracting parameters from the conversation, you can pull
-the appropriate chunks from a vector database or SQL-like data store to enhance accuracy. With Arch, you can
-streamline data retrieval and processing to build more efficient and precise RAG applications.
-
-
Step 1: Define Prompt Targets
-
-
Prompt Targets
-
1prompt_targets:
- 2-name:get_device_statistics
- 3description:Retrieve and present the relevant data based on the specified devices and time range
- 4
- 5path:/agent/device_summary
- 6parameters:
- 7-name:device_ids
- 8type:list
- 9description:A list of device identifiers (IDs) to reboot.
-10required:true
-11-name:time_range
-12type:int
-13description:The number of days in the past over which to retrieve device statistics
-14required:false
-15default:7
-
-
-
-
-
-
Step 2: Process Request Parameters in Flask
-
Once the prompt targets are configured as above, handling those parameters is
-
-
Parameter handling with Flask
-
1fromflaskimportFlask,request,jsonify
- 2
- 3app=Flask(__name__)
- 4
- 5
- 6@app.route("/agent/device_summary",methods=["POST"])
- 7defget_device_summary():
- 8"""
- 9 Endpoint to retrieve device statistics based on device IDs and an optional time range.
-10 """
-11data=request.get_json()
-12
-13# Validate 'device_ids' parameter
-14device_ids=data.get("device_ids")
-15ifnotdevice_idsornotisinstance(device_ids,list):
-16return(
-17jsonify({"error":"'device_ids' parameter is required and must be a list"}),
-18400,
-19)
-20
-21# Validate 'time_range' parameter (optional, defaults to 7)
-22time_range=data.get("time_range",7)
-23ifnotisinstance(time_range,int):
-24returnjsonify({"error":"'time_range' must be an integer"}),400
-25
-26# Simulate retrieving statistics for the given device IDs and time range
-27# In a real application, you would query your database or external service here
-28statistics=[]
-29fordevice_idindevice_ids:
-30# Placeholder for actual data retrieval
-31stats={
-32"device_id":device_id,
-33"time_range":f"Last {time_range} days",
-34"data":f"Statistics data for device {device_id} over the last {time_range} days.",
-35}
-36statistics.append(stats)
-37
-38response={"statistics":statistics}
-39
-40returnjsonify(response),200
-41
-42
-43if__name__=="__main__":
-44app.run(debug=True)
-
-
-
-
-
-
-
Multi-Turn RAG (Follow-up Questions)
-
Developers often struggle to efficiently handle
-follow-up or clarification questions. Specifically, when users ask for changes or additions to previous responses, it requires developers to
-re-write prompts using LLMs with precise prompt engineering techniques. This process is slow, manual, error prone and adds signifcant latency to the
-user experience.
-
Arch is highly capable of accurately detecting and processing prompts in a multi-turn scenarios so that you can buil fast and accurate RAG apps in
-minutes. For additional details on how to build multi-turn RAG applications please refer to our multi-turn docs.
Agents are autonomous systems that handle wide-ranging, open-ended tasks by calling models in a loop until the work is complete. Unlike deterministic prompt targets, agents have access to tools, reason about which actions to take, and adapt their behavior based on intermediate results—making them ideal for complex workflows that require multi-step reasoning, external API calls, and dynamic decision-making.
+
Plano helps developers build and scale multi-agent systems by managing the orchestration layer—deciding which agent(s) or LLM(s) should handle each request, and in what sequence—while developers focus on implementing agent logic in any language or framework they choose.
+
+
Agent Orchestration
+
Plano-Orchestrator is a family of state-of-the-art routing and orchestration models that decide which agent(s) should handle each request, and in what sequence. Built for real-world multi-agent deployments, it analyzes user intent and conversation context to make precise routing and orchestration decisions while remaining efficient enough for low-latency production use across general chat, coding, and long-context multi-turn conversations.
+
This allows development teams to:
+
+
Scale multi-agent systems: Route requests across multiple specialized agents without hardcoding routing logic in application code.
+
Improve performance: Direct requests to the most appropriate agent based on intent, reducing unnecessary handoffs and improving response quality.
+
Enhance debuggability: Centralized routing decisions are observable through Plano’s tracing and logging, making it easier to understand why a particular agent was selected.
+
+
+
+
Inner Loop vs. Outer Loop
+
Plano distinguishes between the inner loop (agent implementation logic) and the outer loop (orchestration and routing):
+
+
Inner Loop (Agent Logic)
+
The inner loop is where your agent lives—the business logic that decides which tools to call, how to interpret results, and when the task is complete. You implement this in any language or framework:
+
+
Python agents: Using frameworks like LangChain, LlamaIndex, CrewAI, or custom Python code.
+
JavaScript/TypeScript agents: Using frameworks like LangChain.js or custom Node.js implementations.
+
Any other AI famreowkr: Agents are just HTTP services that Plano can route to.
+
+
Your agent controls:
+
+
Which tools or APIs to call in response to a prompt.
+
How to interpret tool results and decide next steps.
+
When to call the LLM for reasoning or summarization.
+
When the task is complete and what response to return.
+
+
+
Note
+
Making LLM Calls from Agents
+
When your agent needs to call an LLM for reasoning, summarization, or completion, you should route those calls through Plano’s Model Proxy rather than calling LLM providers directly. This gives you:
+
+
Consistent responses: Normalized response formats across all LLM providers, whether you’re using OpenAI, Anthropic, Azure OpenAI, or any OpenAI-compatible provider.
+
Rich agentic signals: Automatic capture of function calls, tool usage, reasoning steps, and model behavior—surfaced through traces and metrics without instrumenting your agent code.
By routing LLM calls through the Model Proxy, your agents remain decoupled from specific providers and can benefit from centralized policy enforcement, observability, and intelligent routing—all managed in the outer loop. For a step-by-step guide, see LLM Routing in the LLM Router guide.
+
+
+
+
Outer Loop (Orchestration)
+
The outer loop is Plano’s orchestration layer—it manages the lifecycle of requests across agents and LLMs:
+
+
Intent analysis: Plano-Orchestrator analyzes incoming prompts to determine user intent and conversation context.
+
Routing decisions: Routes requests to the appropriate agent(s) or LLM(s) based on capabilities, context, and availability.
+
Sequencing: Determines whether multiple agents need to collaborate and in what order.
+
Lifecycle management: Handles retries, failover, circuit breaking, and load balancing across agent instances.
+
+
By managing the outer loop, Plano allows you to:
+
+
Add new agents without changing routing logic in existing agents.
+
Run multiple versions or variants of agents for A/B testing or canary deployments.
Filter chains are Plano’s way of capturing reusable workflow steps in the dataplane, without duplication and coupling logic into application code. A filter chain is an ordered list of mutations that a request flows through before reaching its final destination —such as an agent, an LLM, or a tool backend. Each filter is a network-addressable service/path that can:
+
+
Inspect the incoming prompt, metadata, and conversation state.
+
Mutate or enrich the request (for example, rewrite queries or build context).
+
Short-circuit the flow and return a response early (for example, block a request on a compliance failure).
+
Emit structured logs and traces so you can debug and continuously improve your agents.
+
+
In other words, filter chains provide a lightweight programming model over HTTP for building reusable steps
+in your agent architectures.
+
+
Typical Use Cases
+
Without a dataplane programming model, teams tend to spread logic like query rewriting, compliance checks,
+context building, and routing decisions across many agents and frameworks. This quickly becomes hard to reason
+about and even harder to evolve.
+
Filter chains show up most often in patterns like:
+
+
Guardrails and Compliance: Enforcing content policies, stripping or masking sensitive data, and blocking obviously unsafe or off-topic requests before they reach an agent.
+
Query rewriting, RAG, and Memory: Rewriting user queries for retrieval, normalizing entities, and assembling RAG context envelopes while pulling in relevant memory (for example, conversation history, user profiles, or prior tool results) before calling a model or tool.
+
Cross-cutting Observability: Injecting correlation IDs, sampling traces, or logging enriched request metadata at consistent points in the request path.
+
+
Because these behaviors live in the dataplane rather than inside individual agents, you define them once, attach them to many agents and prompt targets, and can add, remove, or reorder them without changing application code.
+
+
+
Configuration example
+
The example below shows a configuration where an agent uses a filter chain with two filters: a query rewriter,
+and a context builder that prepares retrieval context before the agent runs.
The filters section defines the reusable filters, each running as its own HTTP/MCP service.
+
The listeners section wires the rag_agent behind an agent listener and attaches a filter_chain with query_rewriter followed by context_builder.
+
When a request arrives at agent_1, Plano executes the filters in order before handing control to rag_agent.
+
+
+
+
Filter Chain Programming Model (HTTP and MCP)
+
Filters are implemented as simple RESTful endpoints reachable via HTTP. If you want to use the Model Context Protocol (MCP), you can configure that as well, which makes it easy to write filters in any language. However, you can also write a filter as a plain HTTP service.
+
When defining a filter in Plano configuration, the following fields are optional:
+
+
type: Controls the filter runtime. Use mcp for Model Context Protocol filters, or http for plain HTTP filters. Defaults to mcp.
+
transport: Controls how Plano talks to the filter (defaults to streamable-http for efficient streaming interactions over HTTP). You can omit this for standard HTTP transport.
+
tool: Names the MCP tool Plano will invoke (by default, the filter id). You can omit this if the tool name matches your filter id.
+
+
In practice, you typically only need to specify id and url to get started. Plano’s sensible defaults mean a filter can be as simple as an HTTP endpoint. If you want to customize the runtime or protocol, those fields are there, but they’re optional.
+
Filters communicate the outcome of their work via HTTP status codes:
+
+
HTTP 200 (Success): The filter successfully processed the request. If the filter mutated the request (e.g., rewrote a query or enriched context), those mutations are passed downstream.
+
HTTP 4xx (User Error): The request violates a filter’s rules or constraints—for example, content moderation policies or compliance checks. The request is terminated, and the error is returned to the caller. This is not a fatal error; it represents expected user-facing policy enforcement.
+
HTTP 5xx (Fatal Error): An unexpected failure in the filter itself (for example, a crash or misconfiguration). Plano will surface the error back to the caller and record it in logs and traces.
+
+
This semantics allows filters to enforce guardrails and policies (4xx) without blocking the entire system, while still surfacing critical failures (5xx) for investigation.
+
If any filter fails or decides to terminate the request early (for example, after a policy violation), Plano will
+surface that outcome back to the caller and record it in logs and traces. This makes filter chains a safe and
+powerful abstraction for evolving your agent workflows over time.
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/concepts/tech_overview/listener.html b/concepts/listeners.html
similarity index 50%
rename from concepts/tech_overview/listener.html
rename to concepts/listeners.html
index 74079e48..a979454f 100755
--- a/concepts/tech_overview/listener.html
+++ b/concepts/listeners.html
@@ -1,25 +1,25 @@
-
+
-Listener | Arch Docs v0.3.22
-
-
-
-
-
-
-
-
-
-
-
-
+Listeners | Plano Docs v0.4
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -152,77 +147,102 @@
-
-
Listener
-
Listener is a top level primitive in Arch, which simplifies the configuration required to bind incoming
-connections from downstream clients, and for egress connections to LLMs (hosted or API)
-
Arch builds on Envoy’s Listener subsystem to streamline connection management for developers. Arch minimizes
-the complexity of Envoy’s listener setup by using best-practices and exposing only essential settings,
-making it easier for developers to bind connections without deep knowledge of Envoy’s configuration model. This
-simplification ensures that connections are secure, reliable, and optimized for performance.
-
-
Downstream (Ingress)
-
Developers can configure Arch to accept connections from downstream clients. A downstream listener acts as the
-primary entry point for incoming traffic, handling initial connection setup, including network filtering, guardrails,
-and additional network security checks. For more details on prompt security and safety,
-see here.
+
+
Listeners
+
Listeners are a top-level primitive in Plano that bind network traffic to the dataplane. They simplify the
+configuration required to accept incoming connections from downstream clients (edge) and to expose a unified egress
+endpoint for calls from your applications to upstream LLMs.
+
Plano builds on Envoy’s Listener subsystem to streamline connection management for developers. It hides most of
+Envoy’s complexity behind sensible defaults and a focused configuration surface, so you can bind listeners without
+deep knowledge of Envoy’s configuration model while still getting secure, reliable, and performant connections.
+
Listeners are modular building blocks: you can configure only inbound listeners (for edge proxying and guardrails),
+only outbound/model-proxy listeners (for LLM routing from your services), or both together. This lets you fit Plano
+cleanly into existing architectures, whether you need it at the edge, behind the firewall, or across the full
+request path.
+
+
Network Topology
+
The diagram below shows how inbound and outbound traffic flow through Plano and how listeners relate to agents,
+prompt targets, and upstream LLMs:
Arch automatically configures a listener to route requests from your application to upstream LLM API providers (or hosts).
-When you start Arch, it creates a listener for egress traffic based on the presence of the listener configuration
-section in the configuration file. Arch binds itself to a local address such as 127.0.0.1:12000/v1 or a DNS-based
-address like arch.local:12000/v1 for outgoing traffic. For more details on LLM providers, read here.
+
+
Inbound (Agent & Prompt Target)
+
Developers configure inbound listeners to accept connections from clients such as web frontends, backend
+services, or other gateways. An inbound listener acts as the primary entry point for prompt traffic, handling
+initial connection setup, TLS termination, guardrails, and forwarding incoming traffic to the appropriate prompt
+targets or agents.
+
There are two primary types of inbound connections exposed via listeners:
+
+
Agent Inbound (Edge): Clients (web/mobile apps or other services) connect to Plano, send prompts, and receive
+responses. This is typically your public/edge listener where Plano applies guardrails, routing, and orchestration
+before returning results to the caller.
+
Prompt Target Inbound (Edge): Your application server calls Plano’s internal listener targeting
+prompt targets that can invoke tools and LLMs directly on its behalf.
+
+
Inbound listeners are where you attach Filter Chains so that safety and context-building happen
+consistently at the edge.
-
-
Configure Listener
-
To configure a Downstream (Ingress) Listener, simply add the listener directive to your configuration file:
+
+
Outbound (Model Proxy & Egress)
+
Plano also exposes an egress listener that your applications call when sending requests to upstream LLM providers
+or self-hosted models. From your application’s perspective this looks like a single OpenAI-compatible HTTP endpoint
+(for example, http://127.0.0.1:12000/v1), while Plano handles provider selection, retries, and failover behind
+the scenes.
+
Under the hood, Plano opens outbound HTTP(S) connections to upstream LLM providers using its unified API surface and
+smart model routing. For more details on how Plano talks to models and how providers are configured, see
+LLM providers.
+
+
+
Configure Listeners
+
Listeners are configured via the listeners block in your Plano configuration. You can define one or more inbound
+listeners (for example, type:edge) or one or more outbound/model listeners (for example, type:model), or both
+in the same deployment.
+
To configure an inbound (edge) listener, add a listeners block to your configuration file and define at least one
+listener with address, port, and protocol details:
Example Configuration
-
1version:v0.1.0
+
1version:v0.2.0 2 3listeners: 4ingress_traffic: 5address:0.0.0.0 6port:10000
- 7message_format:openai
- 8timeout:30s
- 9
-10# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
-11llm_providers:
-12-access_key:$OPENAI_API_KEY
-13model:openai/gpt-4o
-14default:true
-15
-16# default system prompt used by all prompt targets
-17system_prompt:You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
+ 7
+ 8# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
+ 9model_providers:
+10-access_key:$OPENAI_API_KEY
+11model:openai/gpt-4o
+12default:true
+
When you start Plano, you specify a listener address/port that you want to bind downstream. Plano also exposes a
+predefined internal listener (127.0.0.1:12000) that you can use to proxy egress calls originating from your
+application to LLMs (API-based or hosted) via prompt targets.