plano/docs/source/resources/includes/arch_config_full_reference.yaml

version: v0.1

listeners:
  ingress_traffic:
    address: 0.0.0.0
    port: 10000
    message_format: openai
    timeout: 5s
  egress_traffic:
    address: 0.0.0.0
    port: 12000
    message_format: openai
    timeout: 5s

# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
endpoints:
  app_server:
    # value could be ip address or a hostname with port
    # this could also be a list of endpoints for load balancing
    # for example endpoint: [ ip1:port, ip2:port ]
    endpoint: 127.0.0.1:80
    # max time to wait for a connection to be established
    connect_timeout: 0.005s

  mistral_local:
    endpoint: 127.0.0.1:8001

  error_target:
    endpoint: error_target_1

# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
llm_providers:
  - name: openai/gpt-4o
    access_key: $OPENAI_API_KEY
    model: openai/gpt-4o
    default: true

  - access_key: $MISTRAL_API_KEY
    model: mistral/mistral-8x7b

  - model: mistral/mistral-7b-instruct
    base_url: http://mistral_local

# Model aliases - friendly names that map to actual provider names
model_aliases:
  # Alias for summarization tasks -> fast/cheap model
  arch.summarize.v1:
    target: gpt-4o

  # Alias for general purpose tasks -> latest model
  arch.v1:
    target: mistral-8x7b

# provides a way to override default settings for the arch system
overrides:
  # By default Arch uses an NLI + embedding approach to match an incoming prompt to a prompt target.
  # The intent matching threshold is kept at 0.80, you can override this behavior if you would like
  prompt_target_intent_matching_threshold: 0.60

# default system prompt used by all prompt targets
system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.

prompt_guards:
  input_guards:
    jailbreak:
      on_exception:
        message: Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters.

prompt_targets:
  - name: information_extraction
    default: true
    description: handel all scenarios that are question and answer in nature. Like summarization, information extraction, etc.
    endpoint:
      name: app_server
      path: /agent/summary
      http_method: POST
    # Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM
    auto_llm_dispatch_on_response: true
    # override system prompt for this prompt target
    system_prompt: You are a helpful information extraction assistant. Use the information that is provided to you.

  - name: reboot_network_device
    description: Reboot a specific network device
    endpoint:
      name: app_server
      path: /agent/action
    parameters:
      - name: device_id
        type: str
        description: Identifier of the network device to reboot.
        required: true
      - name: confirmation
        type: bool
        description: Confirmation flag to proceed with reboot.
        default: false
        enum: [true, false]

tracing:
  # sampling rate. Note by default Arch works on OpenTelemetry compatible tracing.
  sampling_rate: 0.1