2024-09-20 17:08:42 -07:00
|
|
|
version: "0.1-beta"
|
|
|
|
|
|
|
|
|
|
listener:
|
2024-09-30 17:49:05 -07:00
|
|
|
address: 0.0.0.0 # or 127.0.0.1
|
|
|
|
|
port: 10000
|
|
|
|
|
# Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
|
|
|
|
|
message_format: huggingface
|
2024-09-20 17:08:42 -07:00
|
|
|
common_tls_context: # If you configure port 443, you'll need to update the listener with your TLS certificates
|
|
|
|
|
tls_certificates:
|
|
|
|
|
- certificate_chain:
|
2024-09-30 17:49:05 -07:00
|
|
|
filename: "/etc/certs/cert.pem"
|
2024-09-20 17:08:42 -07:00
|
|
|
private_key:
|
2024-09-30 17:49:05 -07:00
|
|
|
filename: "/etc/certs/key.pem"
|
2024-09-20 17:08:42 -07:00
|
|
|
|
2024-09-30 17:49:05 -07:00
|
|
|
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
|
|
|
|
|
endpoints:
|
|
|
|
|
app_server:
|
|
|
|
|
# value could be ip address or a hostname with port
|
|
|
|
|
# this could also be a list of endpoints for load balancing
|
|
|
|
|
# for example endpoint: [ ip1:port, ip2:port ]
|
|
|
|
|
endpoint: "127.0.0.1:80"
|
|
|
|
|
# max time to wait for a connection to be established
|
|
|
|
|
connect_timeout: 500ms
|
|
|
|
|
# max time to wait for a response
|
|
|
|
|
timeout: 10000ms
|
2024-09-20 17:08:42 -07:00
|
|
|
|
2024-09-30 17:49:05 -07:00
|
|
|
mistral_local:
|
|
|
|
|
endpoint: "127.0.0.1:8001"
|
|
|
|
|
|
|
|
|
|
error_target:
|
|
|
|
|
endpoint: "error_target_1"
|
|
|
|
|
|
|
|
|
|
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
|
|
|
|
|
llm_providers:
|
2024-09-20 17:08:42 -07:00
|
|
|
- name: "OpenAI"
|
|
|
|
|
access_key: $OPENAI_API_KEY
|
2024-09-24 13:54:17 -07:00
|
|
|
model: gpt-4o
|
2024-09-20 17:08:42 -07:00
|
|
|
default: true
|
|
|
|
|
stream: true
|
2024-09-30 17:49:05 -07:00
|
|
|
rate_limits:
|
2024-09-20 17:08:42 -07:00
|
|
|
selector: #optional headers, to add rate limiting based on http headers like JWT tokens or API keys
|
2024-09-30 17:49:05 -07:00
|
|
|
http_header:
|
2024-09-20 17:08:42 -07:00
|
|
|
name: "Authorization"
|
2024-09-24 13:54:17 -07:00
|
|
|
value: "" # Empty value means each separate value has a separate limit
|
2024-09-20 17:08:42 -07:00
|
|
|
limit:
|
2024-09-30 17:49:05 -07:00
|
|
|
tokens: 100000 # Tokens per unit
|
2024-09-20 17:08:42 -07:00
|
|
|
unit: "minute"
|
2024-09-30 17:49:05 -07:00
|
|
|
|
|
|
|
|
- name: "Mistral8x7b"
|
2024-09-20 17:08:42 -07:00
|
|
|
access_key: $MISTRAL_API_KEY
|
2024-09-30 17:49:05 -07:00
|
|
|
model: "mistral-8x7b"
|
|
|
|
|
|
|
|
|
|
- name: "MistralLocal7b"
|
|
|
|
|
model: "mistral-7b-instruct"
|
|
|
|
|
endpoint: "mistral_local"
|
2024-09-20 17:08:42 -07:00
|
|
|
|
2024-09-30 17:49:05 -07:00
|
|
|
# provides a way to override default settings for the arch system
|
|
|
|
|
overrides:
|
|
|
|
|
# By default Arch uses an NLI + embedding approach to match an incomming prompt to a prompt target.
|
|
|
|
|
# The intent matching threshold is kept at 0.80, you can overide this behavior if you would like
|
|
|
|
|
prompt_target_intent_matching_threshold: 0.60
|
|
|
|
|
|
|
|
|
|
# default system prompt used by all prompt targets
|
|
|
|
|
system_prompt: |
|
|
|
|
|
You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
|
2024-09-20 17:08:42 -07:00
|
|
|
|
|
|
|
|
prompt_guards:
|
2024-09-30 17:49:05 -07:00
|
|
|
input_guards:
|
|
|
|
|
jailbreak:
|
2024-09-20 17:08:42 -07:00
|
|
|
on_exception:
|
|
|
|
|
message: "Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters."
|
|
|
|
|
|
|
|
|
|
prompt_targets:
|
|
|
|
|
- name: "reboot_network_device"
|
|
|
|
|
description: "Helps network operators perform device operations like rebooting a device."
|
2024-09-30 17:49:05 -07:00
|
|
|
endpoint:
|
|
|
|
|
name: app_server
|
|
|
|
|
path: "/agent/action"
|
2024-09-20 17:08:42 -07:00
|
|
|
parameters:
|
|
|
|
|
- name: "device_id"
|
2024-09-30 17:49:05 -07:00
|
|
|
# additional type options include: int | float | bool | string | list | dict
|
|
|
|
|
type: "string"
|
2024-09-20 17:08:42 -07:00
|
|
|
description: "Identifier of the network device to reboot."
|
|
|
|
|
required: true
|
|
|
|
|
- name: "confirmation"
|
2024-09-30 17:49:05 -07:00
|
|
|
type: "string"
|
2024-09-20 17:08:42 -07:00
|
|
|
description: "Confirmation flag to proceed with reboot."
|
2024-09-30 17:49:05 -07:00
|
|
|
default: "no"
|
|
|
|
|
enum: [yes, no]
|
|
|
|
|
|
|
|
|
|
- name: "information_extraction"
|
|
|
|
|
default: true
|
|
|
|
|
description: "This prompt handles all scenarios that are question and answer in nature. Like summarization, information extraction, etc."
|
|
|
|
|
endpoint:
|
|
|
|
|
name: app_server
|
|
|
|
|
path: "/agent/summary"
|
|
|
|
|
method: Post
|
|
|
|
|
# Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM
|
|
|
|
|
auto_llm_dispatch_on_response: true
|
|
|
|
|
# override system prompt for this prompt target
|
|
|
|
|
system_prompt: |
|
|
|
|
|
You are a helpful information extraction assistant. Use the information that is provided to you.
|
2024-09-20 17:08:42 -07:00
|
|
|
|
|
|
|
|
error_target:
|
2024-09-30 17:49:05 -07:00
|
|
|
endpoint:
|
|
|
|
|
name: error_target_1
|
|
|
|
|
path: /error
|
2024-09-20 17:08:42 -07:00
|
|
|
|
2024-09-30 14:54:01 -07:00
|
|
|
tracing: 100 #sampling rate. Note by default Arch works on OpenTelemetry compatible tracing.
|