diff --git a/docs/source/resources/includes/arch_config_full_reference.yaml b/docs/source/resources/includes/arch_config_full_reference.yaml index a957d01c..514cf9e7 100644 --- a/docs/source/resources/includes/arch_config_full_reference.yaml +++ b/docs/source/resources/includes/arch_config_full_reference.yaml @@ -1,94 +1,75 @@ -version: v0.1 +# Arch Gateway configuration version +version: v0.3.0 +# External HTTP agents - API type is controlled by request path (/v1/responses, /v1/messages, /v1/chat/completions) +agents: + - id: weather_agent + url: http://host.docker.internal:10510 + + - id: flight_agent + url: http://host.docker.internal:10520 + +# MCP filters applied to requests/responses (e.g., input validation, query rewriting) +filters: + - id: input_guards + url: http://host.docker.internal:10500 + # type: mcp (default) + # transport: streamable-http (default) + # tool: input_guards (default - same as filter id) + +# LLM provider configurations with API keys and model routing +model_providers: + - model: openai/gpt-4o + access_key: $OPENAI_API_KEY + default: true + + - model: openai/gpt-4o-mini + access_key: $OPENAI_API_KEY + + - model: anthropic/claude-sonnet-4-0 + access_key: $ANTHROPIC_API_KEY + + - model: mistral/ministral-3b-latest + access_key: $MISTRAL_API_KEY + +# Model aliases - use friendly names instead of full provider model names +model_aliases: + fast-llm: + target: gpt-4o-mini + + smart-llm: + target: gpt-4o + +# HTTP listeners - entry points for agent routing and direct LLM access listeners: - ingress_traffic: + # Agent listener for routing requests to multiple agents + - type: agent + name: travel_booking_service + port: 8001 + router: plano_orchestrator_v1 address: 0.0.0.0 - port: 10000 - message_format: openai - timeout: 5s - egress_traffic: + agents: + - id: rag_agent + description: virtual assistant for retrieval augmented generation tasks + filter_chain: + - input_guards + + # Model listener for direct LLM access + - type: model + name: model_1 address: 0.0.0.0 port: 12000 - message_format: openai - timeout: 5s -# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem. +# Reusable service endpoints endpoints: app_server: - # value could be ip address or a hostname with port - # this could also be a list of endpoints for load balancing - # for example endpoint: [ ip1:port, ip2:port ] endpoint: 127.0.0.1:80 - # max time to wait for a connection to be established connect_timeout: 0.005s mistral_local: endpoint: 127.0.0.1:8001 - error_target: - endpoint: error_target_1 - -# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way -llm_providers: - - name: openai/gpt-4o - access_key: $OPENAI_API_KEY - model: openai/gpt-4o - default: true - - - access_key: $MISTRAL_API_KEY - model: mistral/mistral-8x7b - - - model: mistral/mistral-7b-instruct - base_url: http://mistral_local - -# Model aliases - friendly names that map to actual provider names -model_aliases: - # Alias for summarization tasks -> fast/cheap model - arch.summarize.v1: - target: gpt-4o - - # Alias for general purpose tasks -> latest model - arch.v1: - target: mistral-8x7b - -# provides a way to override default settings for the arch system -overrides: - # By default Arch uses an NLI + embedding approach to match an incoming prompt to a prompt target. - # The intent matching threshold is kept at 0.80, you can override this behavior if you would like - prompt_target_intent_matching_threshold: 0.60 - -# default system prompt used by all prompt targets -system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions. - -prompt_targets: - - name: information_extraction - default: true - description: handel all scenarios that are question and answer in nature. Like summarization, information extraction, etc. - endpoint: - name: app_server - path: /agent/summary - http_method: POST - # Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM - auto_llm_dispatch_on_response: true - # override system prompt for this prompt target - system_prompt: You are a helpful information extraction assistant. Use the information that is provided to you. - - - name: reboot_network_device - description: Reboot a specific network device - endpoint: - name: app_server - path: /agent/action - parameters: - - name: device_id - type: str - description: Identifier of the network device to reboot. - required: true - - name: confirmation - type: bool - description: Confirmation flag to proceed with reboot. - default: false - enum: [true, false] - +# OpenTelemetry tracing configuration tracing: - # sampling rate. Note by default Arch works on OpenTelemetry compatible tracing. - sampling_rate: 0.1 + # Random sampling percentage (1-100) + random_sampling: 100 diff --git a/docs/source/resources/includes/arch_config_full_reference_rendered.yaml b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml index 7a3473d9..ccb695f5 100644 --- a/docs/source/resources/includes/arch_config_full_reference_rendered.yaml +++ b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml @@ -1,15 +1,46 @@ +agents: +- id: weather_agent + url: http://host.docker.internal:10510 +- id: flight_agent + url: http://host.docker.internal:10520 endpoints: app_server: connect_timeout: 0.005s endpoint: 127.0.0.1 port: 80 - error_target: - endpoint: error_target_1 - port: 80 + flight_agent: + endpoint: host.docker.internal + port: 10520 + protocol: http + input_guards: + endpoint: host.docker.internal + port: 10500 + protocol: http mistral_local: endpoint: 127.0.0.1 port: 8001 + weather_agent: + endpoint: host.docker.internal + port: 10510 + protocol: http +filters: +- id: input_guards + url: http://host.docker.internal:10500 listeners: +- address: 0.0.0.0 + agents: + - description: virtual assistant for retrieval augmented generation tasks + filter_chain: + - input_guards + id: rag_agent + name: travel_booking_service + port: 8001 + router: plano_orchestrator_v1 + type: agent +- address: 0.0.0.0 + name: model_1 + port: 12000 + type: model - address: 0.0.0.0 model_providers: - access_key: $OPENAI_API_KEY @@ -17,49 +48,44 @@ listeners: model: gpt-4o name: openai/gpt-4o provider_interface: openai + - access_key: $OPENAI_API_KEY + model: gpt-4o-mini + name: openai/gpt-4o-mini + provider_interface: openai + - access_key: $ANTHROPIC_API_KEY + model: claude-sonnet-4-0 + name: anthropic/claude-sonnet-4-0 + provider_interface: anthropic - access_key: $MISTRAL_API_KEY - model: mistral-8x7b - name: mistral/mistral-8x7b - provider_interface: mistral - - base_url: http://mistral_local - cluster_name: mistral_mistral_local - endpoint: mistral_local - model: mistral-7b-instruct - name: mistral/mistral-7b-instruct - port: 80 - protocol: http + model: ministral-3b-latest + name: mistral/ministral-3b-latest provider_interface: mistral name: egress_traffic port: 12000 - timeout: 5s + timeout: 30s type: model_listener -- address: 0.0.0.0 - name: ingress_traffic - port: 10000 - timeout: 5s - type: prompt_listener model_aliases: - arch.summarize.v1: + fast-llm: + target: gpt-4o-mini + smart-llm: target: gpt-4o - arch.v1: - target: mistral-8x7b model_providers: - access_key: $OPENAI_API_KEY default: true model: gpt-4o name: openai/gpt-4o provider_interface: openai +- access_key: $OPENAI_API_KEY + model: gpt-4o-mini + name: openai/gpt-4o-mini + provider_interface: openai +- access_key: $ANTHROPIC_API_KEY + model: claude-sonnet-4-0 + name: anthropic/claude-sonnet-4-0 + provider_interface: anthropic - access_key: $MISTRAL_API_KEY - model: mistral-8x7b - name: mistral/mistral-8x7b - provider_interface: mistral -- base_url: http://mistral_local - cluster_name: mistral_mistral_local - endpoint: mistral_local - model: mistral-7b-instruct - name: mistral/mistral-7b-instruct - port: 80 - protocol: http + model: ministral-3b-latest + name: mistral/ministral-3b-latest provider_interface: mistral - model: Arch-Function name: arch-function @@ -67,39 +93,6 @@ model_providers: - model: Plano-Orchestrator name: plano-orchestrator provider_interface: arch -overrides: - prompt_target_intent_matching_threshold: 0.6 -prompt_targets: -- auto_llm_dispatch_on_response: true - default: true - description: handel all scenarios that are question and answer in nature. Like summarization, - information extraction, etc. - endpoint: - http_method: POST - name: app_server - path: /agent/summary - name: information_extraction - system_prompt: You are a helpful information extraction assistant. Use the information - that is provided to you. -- description: Reboot a specific network device - endpoint: - name: app_server - path: /agent/action - name: reboot_network_device - parameters: - - description: Identifier of the network device to reboot. - name: device_id - required: true - type: str - - default: false - description: Confirmation flag to proceed with reboot. - enum: - - true - - false - name: confirmation - type: bool -system_prompt: You are a network assistant that just offers facts; not advice on manufacturers - or purchasing decisions. tracing: - sampling_rate: 0.1 -version: v0.1 + random_sampling: 100 +version: v0.3.0