diff --git a/demos/hr_agent/arch_config.yaml b/demos/hr_agent/arch_config.yaml index a7d75d72..fa6a499b 100644 --- a/demos/hr_agent/arch_config.yaml +++ b/demos/hr_agent/arch_config.yaml @@ -12,6 +12,16 @@ llm_providers: model: gpt-4o default: true +# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem. +endpoints: + app_server: + # value could be ip address or a hostname with port + # this could also be a list of endpoints for load balancing + # for example endpoint: [ ip1:port, ip2:port ] + endpoint: host.docker.internal:18083 + # max time to wait for a connection to be established + connect_timeout: 0.005s + # default system prompt used by all prompt targets system_prompt: | You are a HR agent assistant that helps HR decision makers with reporting and workfoce planning. Nothing else. Please stay on topic of HR. @@ -25,34 +35,15 @@ prompt_targets: parameters: - name: staffing_type type: str - description: The staffing type like contract, fte or agency + description: The staffing type like contract, fte or agency required: true - name: region type: str required: true - description: the geographical region for which you want headcount data. + description: the geographical region for which you want headcount data. - name: hr_qa endpoint: name: app_server path: /agent/hr_qa description: Handle general Q/A related to HR. default: true - -# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem. -endpoints: - app_server: - # value could be ip address or a hostname with port - # this could also be a list of endpoints for load balancing - # for example endpoint: [ ip1:port, ip2:port ] - endpoint: host.docker.internal:18083 - # max time to wait for a connection to be established - connect_timeout: 0.005s - -ratelimits: - - model: gpt-4 - selector: - key: selector-key - value: selector-value - limit: - tokens: 1 - unit: minute