Docs branch - v1 of our tech docs (#69)

* added the first set of docs for our technical docs * more docuemtnation changes * added support for prompt processing and updated life of a request * updated docs to including getting help sections and updated life of a request * committing local changes for getting started guide, sample applications, and full reference spec for prompt-config * updated configuration reference, added sample app skeleton, updated favico * fixed the configuration refernce file, and made minor changes to the intent detection. commit v1 for now --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local> Co-authored-by: Adil Hafeez <adil@katanemo.com>
2026-07-20 16:41:04 +02:00 · 2024-09-20 17:08:42 -07:00 · 2024-09-20 17:08:42 -07:00 · 80c554ce1a
commit 80c554ce1a
parent 233976a568
34 changed files with 1040 additions and 0 deletions
--- a/docs/source/_config/prompt-config-full-reference.yml
+++ b/docs/source/_config/prompt-config-full-reference.yml
@ -0,0 +1,78 @@
+version: "0.1-beta"
+
+listener:
+  address: 0.0.0.0  # or 127.0.0.1
+  port_value: 8080  
+  messages: "hugging-face-messages-json" # Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
+  common_tls_context: # If you configure port 443, you'll need to update the listener with your TLS certificates
+    tls_certificates:
+      - certificate_chain:
+          filename: "/etc/arch/certs/cert.pem"
+        private_key:
+          filename: "/etc/arch/certs/key.pem"
+
+system_prompts:
+  - name: "network_assistant"
+    content: |
+      You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
+
+#Centralized way to manage LLM providers that the application has access to. Manage keys retry logic, failover, and limits in a central way
+llm_providers:
+  - name: "OpenAI"
+    access_key: $OPENAI_API_KEY
+    model: "gpt-40"
+    default: true
+    stream: true
+    rate_limit:
+      selector: #optional headers, to add rate limiting based on http headers like JWT tokens or API keys
+        http-header:
+          name: "Authorization"
+          value: ""  # Empty value means each separate value has a separate limit
+      limit:
+        tokens: 100000  # Tokens per unit
+        unit: "minute"
+  - name: "Mistral"
+    access_key: $MISTRAL_API_KEY
+    model: "mistral-7B"
+
+prompt_endpoints: #Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
+  - "http://127.0.0.2" #assumes port 8000, unless port is specified with :5000
+  - "http://127.0.0.1:5000"
+
+prompt_guards:
+  input_guard:
+    - name: "jailbreak"
+      on_exception:
+        forward_to_error_target: true
+      # Additional guard configurations can be added here
+    - name: "toxicity"
+      on_exception:
+        message: "Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters."
+
+prompt_targets:
+  - name: "information_extraction"
+    type: "default"
+    description: "This prompt handles all scenarios that are question and answer in nature. Like summarization, information extraction, etc."
+    path: "/agent/summary"
+    auto-llm-dispatch-on-response: true #Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM
+
+  - name: "reboot_network_device"
+    path: "/agent/action"
+    description: "Helps network operators perform device operations like rebooting a device."
+    parameters:
+      - name: "device_id"
+        type: "string" # additional type options include: integer | float | list | dictionary | set
+        description: "Identifier of the network device to reboot."
+        default_value: ""
+        required: true
+      - name: "confirmation"
+        type: "integer" # additional type options include: integer | float | list | dictionary | set
+        description: "Confirmation flag to proceed with reboot."
+        required: true
+
+error_target:
+  name: "error_handler"
+  path: "/errors"
+
+intent-detection-threshold-override: 0.60 # By default Arch uses an NLI + embedding approach to match an incomming prompt to a prompt target. 
+                                          # The intent matching threshold is kept at 0.80, you can overide this behavior if you would like