plano/docs/source/_config/prompt-config-full-reference.yml

version: "0.1-beta"

listener:
  address: 0.0.0.0  # or 127.0.0.1
  port_value: 8080
  messages: "hugging-face-messages-json" # Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
  common_tls_context: # If you configure port 443, you'll need to update the listener with your TLS certificates
    tls_certificates:
      - certificate_chain:
          filename: "/etc/arch/certs/cert.pem"
        private_key:
          filename: "/etc/arch/certs/key.pem"

system_prompts:
  - name: "network_assistant"
    content: |
      You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.

llm_providers: #Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
  - name: "OpenAI"
    access_key: $OPENAI_API_KEY
    model: gpt-4o
    default: true
    stream: true
    rate_limit:
      selector: #optional headers, to add rate limiting based on http headers like JWT tokens or API keys
        http-header:
          name: "Authorization"
          value: "" # Empty value means each separate value has a separate limit
      limit:
        tokens: 100000  # Tokens per unit
        unit: "minute"
  - name: "Mistral"
    access_key: $MISTRAL_API_KEY
    model: "mistral-7B"

prompt_endpoints: #Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
  - "http://127.0.0.2" #assumes port 8000, unless port is specified with :5000
  - "http://127.0.0.1:5000"

prompt_guards:
  input_guard:
    - name: "jailbreak"
      on_exception:
        forward_to_error_target: true
    - name: "toxicity"
      on_exception:
        message: "Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters."

prompt_targets:
  - name: "information_extraction"
    type: "default"
    description: "This prompt handles all scenarios that are question and answer in nature. Like summarization, information extraction, etc."
    path: "/agent/summary"
    auto-llm-dispatch-on-response: true #Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM

  - name: "reboot_network_device"
    path: "/agent/action"
    description: "Helps network operators perform device operations like rebooting a device."
    parameters:
      - name: "device_id"
        type: "string" # additional type options include: integer | float | list | dictionary | set
        description: "Identifier of the network device to reboot."
        default_value: ""
        required: true
      - name: "confirmation"
        type: "integer" # additional type options include: integer | float | list | dictionary | set
        description: "Confirmation flag to proceed with reboot."
        required: true

error_target:
  name: "error_handler"
  path: "/errors"

tracing: 100 #sampling rate. Note by default Arch works on OpenTelemetry compatible tracing.

intent-detection-threshold-override: 0.60 # By default Arch uses an NLI + embedding approach to match an incomming prompt to a prompt target.
                                          # The intent matching threshold is kept at 0.80, you can overide this behavior if you would like