plano/demos/function_calling/arch_config.yaml

version: "0.1-beta"

listener:
  address: 0.0.0.0
  port: 10000
  message_format: huggingface
  connect_timeout: 0.005s

endpoints:
  api_server:
    endpoint: host.docker.internal:18083
    connect_timeout: 0.005s

overrides:
  # confidence threshold for prompt target intent matching
  prompt_target_intent_matching_threshold: 0.6

llm_providers:
  - name: gpt-4o
    access_key: OPENAI_API_KEY
    provider: openai
    model: gpt-4o
    default: true

system_prompt: |
  You are a helpful assistant.

prompt_targets:
  - name: weather_forecast
    description: Check weather information for a given city.
    parameters:
      - name: city
        description: the name of the city
        required: true
        type: str
      - name: days
        description: the number of days
        type: int
        required: true
      - name: units
        description: the temperature unit, e.g., Celsius and Fahrenheit
        type: str
        default: Fahrenheit
    endpoint:
      name: api_server
      path: /weather

  - name: insurance_claim_details
    description: Get the details of the insurance claim for a given policy number
    parameters:
      - name: policy_number
        type: str
        description: the policy number for the insurance claim
        required: true
      - name: include_expired
        description: indicate whether to include expired insurance claims
        type: bool
        required: true
    endpoint:
      name: api_server
      path: /insurance_claim_details

  - name: default_target
    default: true
    description: This is the default target for all unmatched prompts.
    endpoint:
      name: api_server
      path: /default_target
    system_prompt: |
      You are a helpful assistant. Use the information that is provided to you.
    # if it is set to false arch will send response that it received from this prompt target to the user
    # if true arch will forward the response to the default LLM
    auto_llm_dispatch_on_response: true

ratelimits:
  - model: gpt-4
    selector:
      key: selector-key
      value: selector-value
    limit:
      tokens: 1
      unit: minute

tracing:
  random_sampling: 100