Doc Update (#129)

* init update * Update terminology.rst * fix the branch to create an index.html, and fix pre-commit issues * Doc update * made several changes to the docs after Shuguang's revision * fixing pre-commit issues * fixed the reference file to the final prompt config file * added google analytics --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local>
2026-06-08 14:55:14 +02:00 · 2024-10-06 16:54:34 -07:00 · 2024-10-06 16:54:34 -07:00 · 5c7567584d
commit 5c7567584d
parent 2a7b95582c
49 changed files with 1185 additions and 609 deletions
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@ -1,2 +1,5 @@
-sphinx_book_theme==1.1.3
 sphinx_copybutton==0.5.2
+sphinxawesome-theme
+sphinx_sitemap
+sphinx_design
+sphinxawesome_theme
--- a/docs/source/_config/getting-started.yml
+++ b/docs/source/_config/getting-started.yml
@ -1,41 +0,0 @@
-version: "0.1-beta"
-listener:
-  address: 127.0.0.1 | 0.0.0.0
-  port_value: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
-  messages: tuple | hugging-face-messages-api
-
-system_prompts:
-  - name: network_assistant
-    content: You are a network assistant that just offers facts about the operational health of the network
-
-llm_providers:
-    - name: "OpenAI"
-      access_key: $OPEN_AI_KEY
-      model: gpt-4o
-      default: true
-    - name: "Mistral"
-      access_key: $MISTRAL_KEY
-      model: mixtral8-7B
-
-prompt_endpoints:
-  - "http://127.0.0.2"
-  - "http://127.0.0.1"
-
-prompt_guards:
-   input-guard:
-     - name: #jailbreak
-       on-exception-message: Looks like you are curious about my abilities. But I can only
-
-prompt_targets:
-   - name: information_extraction
-     type: RAG
-     description: this prompt handles all information extractions scenarios
-     path: /agent/summary
-
-   - name: reboot_network_device
-     path: /agent/action
-     description: used to help network operators with perform device operations like rebooting a device.
-     parameters:
-error_target: #handle errors from Bolt or upstream LLMs
-    name: “error_handler”
-    path: /errors
--- a/docs/source/_include/function_calling_flask.py
+++ b/docs/source/_include/function_calling_flask.py
@ -1,72 +0,0 @@
-from flask import Flask, request, jsonify
-
-app = Flask(__name__)
-
-@app.route('/agent/device_reboot', methods=['POST'])
-def reboot_devices():
-    """
-    Endpoint to reboot devices based on device IDs or a device group.
-    """
-    data = request.get_json()
-
-    # Extract parameters based on the prompt targets definition
-    device_ids = data.get('device_ids')
-    device_group = data.get('device_group')
-
-    # Validate that at least one parameter is provided
-    if not device_ids and not device_group:
-        return jsonify({'error': "At least one of 'device_ids' or 'device_group' must be provided."}), 400
-
-    devices_to_reboot = []
-
-    # Process 'device_ids' if provided
-    if device_ids:
-        if not isinstance(device_ids, list):
-            return jsonify({'error': "'device_ids' must be a list."}), 400
-        devices_to_reboot.extend(device_ids)
-
-    # Process 'device_group' if provided
-    if device_group:
-        if not isinstance(device_group, str):
-            return jsonify({'error': "'device_group' must be a string."}), 400
-        # Simulate retrieving device IDs from the device group
-        # In a real application, replace this with actual data retrieval
-        group_devices = get_devices_by_group(device_group)
-        if not group_devices:
-            return jsonify({'error': f"No devices found in group '{device_group}'."}), 404
-        devices_to_reboot.extend(group_devices)
-
-    # Remove duplicates in case of overlap between device_ids and device_group
-    devices_to_reboot = list(set(devices_to_reboot))
-
-    # Simulate rebooting devices
-    reboot_results = []
-    for device_id in devices_to_reboot:
-        # Placeholder for actual reboot logic
-        result = {
-            'device_id': device_id,
-            'status': 'Reboot initiated'
-        }
-        reboot_results.append(result)
-
-    response = {
-        'reboot_results': reboot_results
-    }
-
-    return jsonify(response), 200
-
-def get_devices_by_group(group_name):
-    """
-    Simulate retrieving device IDs based on a device group name.
-    In a real application, this would query a database or external service.
-    """
-    # Placeholder data for demonstration purposes
-    device_groups = {
-        'Sales': ['1001', '1002', '1003'],
-        'Engineering': ['2001', '2002', '2003'],
-        'Data Center': ['3001', '3002', '3003']
-    }
-    return device_groups.get(group_name, [])
-
-if __name__ == '__main__':
-    app.run(debug=True)
--- a/docs/source/_include/intent_request_example.json
+++ b/docs/source/_include/intent_request_example.json
@ -1,6 +0,0 @@
-{
-    "user_id": "user123",
-    "messages": [
-      {"role": "user", "content": "Tell me a joke."}
-    ]
-}
--- a/docs/source/_include/intent_response_example.json
+++ b/docs/source/_include/intent_response_example.json
@ -1,12 +0,0 @@
-{
-    "user_id": "user123",
-    "messages": [
-      {
-        "uuid": "550e8400-e29b-41d4-a716-446655440000",
-        "timestamp": "2023-10-05T12:34:56.789123",
-        "role": "user",
-        "content": "Tell me a joke.",
-        "intent_changed": true
-      }
-    ]
-  }
--- a/docs/source/_static/css/arch.css
+++ b/docs/source/_static/css/arch.css
@ -1,5 +0,0 @@
-@import url("theme.css");
-
-body {
-  font-size: 1em;
-}
--- a/docs/source/_templates/analytics.html
+++ b/docs/source/_templates/analytics.html
@ -0,0 +1,12 @@
+<!-- _templates/analytics.html -->
+{% if google_analytics_id %}
+<!-- Google tag (gtag.js) -->
+<script async src="https://www.googletagmanager.com/gtag/js?id={{ google_analytics_id }}"></script>
+<script>
+  window.dataLayer = window.dataLayer || [];
+  function gtag(){dataLayer.push(arguments);}
+  gtag('js', new Date());
+
+  gtag('config', '{{ google_analytics_id }}');
+</script>
+{% endif %}
--- a/docs/source/getting_started/use_cases/function_calling.rst
+++ b/docs/source/getting_started/use_cases/function_calling.rst
@ -1,6 +1,6 @@
-.. _arch_function_calling_agentic_guide:
+.. _arch_agent_guide:

-Agentic (Text-to-Action) Apps
+Agentic Workflow
 ==============================

 Arch helps you easily personalize your applications by calling application-specific (API) functions
@ -11,11 +11,10 @@ claims to creating ad campaigns - via prompts.

 Arch analyzes prompts, extracts critical information from prompts, engages in lightweight conversation with
 the user to gather any missing parameters and makes API calls so that you can focus on writing business logic.
-Arch does this via its purpose-built :ref:`Arch-FC LLM <llms_in_arch>` - the fastest (200ms p90 - 10x faser than GPT-4o)
+Arch does this via its purpose-built :ref:`Arch-FC LLM <function_calling>` - the fastest (200ms p90 - 10x faser than GPT-4o)
 and cheapest (100x than GPT-40) function-calling LLM that matches performance with frontier models.
-______________________________________________________________________________________________

-.. image:: /_static/img/function-calling-network-flow.jpg
+.. image:: includes/agent/function-calling-flow.jpg
   :width: 100%
   :align: center

@ -29,7 +28,7 @@ is how you would go about enabling this scenario with Arch:
 Step 1: Define prompt targets with functions
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

-.. literalinclude:: /_config/function-calling-network-agent.yml
+.. literalinclude:: includes/agent/function-calling-agent.yaml
    :language: yaml
    :linenos:
    :emphasize-lines: 16-37
@ -40,10 +39,10 @@ Step 2: Process request parameters in Flask

 Once the prompt targets are configured as above, handling those parameters is

-.. literalinclude:: /_include/parameter_handling_flask.py
+.. literalinclude:: includes/agent/parameter_handling.py
    :language: python
    :linenos:
-    :caption: Flask API example for parameter extraction via HTTP request parameters
+    :caption: Parameter handling with Flask

 Parallel/ Multiple Function Calling
 -----------------------------------
@ -64,7 +63,7 @@ the user's intent.

 Example of Multiple Prompt Targets in YAML:

-.. literalinclude:: /_config/function-calling-network-agent.yml
+.. literalinclude:: includes/agent/function-calling-agent.yaml
    :language: yaml
    :linenos:
    :emphasize-lines: 16-37
--- a/docs/source/build_with_arch/includes/agent/function-calling-agent.yaml
+++ b/docs/source/build_with_arch/includes/agent/function-calling-agent.yaml
@ -3,15 +3,15 @@ listen:
  address: 127.0.0.1 | 0.0.0.0
  port_value: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates

-system_prompts:
-  - name: network_assistant
-    content: You are a network assistant that just offers facts about the operational health of the network
+system_prompt: |
+  You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.

 llm_providers:
-    - name: "OpenAI"
-      access_key: $OPEN_AI_KEY
-      model: gpt-4o
-      default: true
+  - name: "OpenAI"
+    provider: "openai"
+    access_key: OPENAI_API_KEY
+    model: gpt-4o
+    stream: true

 prompt_targets:
  - name: reboot_devices
@ -36,6 +36,12 @@ prompt_targets:
        description: "The name of the device group to reboot."
        required: false

-prompt_endpoints:
-  - "http://127.0.0.2"
-  - "http://127.0.0.1"
+# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
+endpoints:
+  app_server:
+    # value could be ip address or a hostname with port
+    # this could also be a list of endpoints for load balancing
+    # for example endpoint: [ ip1:port, ip2:port ]
+    endpoint: "127.0.0.1:80"
+    # max time to wait for a connection to be established
+    connect_timeout: 0.005s
--- a/docs/source/build_with_arch/includes/agent/function-calling-flow.jpg
+++ b/docs/source/build_with_arch/includes/agent/function-calling-flow.jpg
--- a/docs/source/build_with_arch/includes/agent/parameter_handling.py
+++ b/docs/source/build_with_arch/includes/agent/parameter_handling.py
--- a/docs/source/build_with_arch/includes/rag/intent_detection.py
+++ b/docs/source/build_with_arch/includes/rag/intent_detection.py
--- a/docs/source/build_with_arch/includes/rag/parameter_handling.py
+++ b/docs/source/build_with_arch/includes/rag/parameter_handling.py
@ -0,0 +1,41 @@
+from flask import Flask, request, jsonify
+
+app = Flask(__name__)
+
+@app.route('/agent/device_summary', methods=['POST'])
+def get_device_summary():
+    """
+    Endpoint to retrieve device statistics based on device IDs and an optional time range.
+    """
+    data = request.get_json()
+
+    # Validate 'device_ids' parameter
+    device_ids = data.get('device_ids')
+    if not device_ids or not isinstance(device_ids, list):
+        return jsonify({'error': "'device_ids' parameter is required and must be a list"}), 400
+
+    # Validate 'time_range' parameter (optional, defaults to 7)
+    time_range = data.get('time_range', 7)
+    if not isinstance(time_range, int):
+        return jsonify({'error': "'time_range' must be an integer"}), 400
+
+    # Simulate retrieving statistics for the given device IDs and time range
+    # In a real application, you would query your database or external service here
+    statistics = []
+    for device_id in device_ids:
+        # Placeholder for actual data retrieval
+        stats = {
+            'device_id': device_id,
+            'time_range': f'Last {time_range} days',
+            'data': f'Statistics data for device {device_id} over the last {time_range} days.'
+        }
+        statistics.append(stats)
+
+    response = {
+        'statistics': statistics
+    }
+
+    return jsonify(response), 200
+
+if __name__ == '__main__':
+    app.run(debug=True)
--- a/docs/source/build_with_arch/includes/rag/prompt_targets.yaml
+++ b/docs/source/build_with_arch/includes/rag/prompt_targets.yaml
@ -1,18 +1,3 @@
-version: "0.1-beta"
-listener:
-  address: 127.0.0.1 | 0.0.0.0
-  port_value: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
-
-system_prompts:
-  - name: network_assistant
-    content: You are a network assistant that just offers facts about the operational health of the network
-
-llm_providers:
-    - name: "OpenAI"
-      access_key: $OPEN_AI_KEY
-      model: gpt-4o
-      default: true
-
 prompt_targets:
  - name: get_device_statistics
    description: >
@ -34,7 +19,3 @@ prompt_targets:
        description: "The number of days in the past over which to retrieve device statistics. Defaults to 7 days if not specified."
        required: false
        default: 7
-
-prompt_endpoints:
-  - "http://127.0.0.2"
-  - "http://127.0.0.1"
--- a/docs/source/getting_started/use_cases/rag.rst
+++ b/docs/source/getting_started/use_cases/rag.rst
@ -1,7 +1,7 @@
 .. _arch_rag_guide:

-Retrieval-Augmented (RAG)
-=========================
+RAG Application
+===============

 The following section describes how Arch can help you build faster, smarter and more accurate
 Retrieval-Augmented Generation (RAG) applications.
@ -21,14 +21,13 @@ Arch's intent-drift detection mechanism is based on its' *prompt_targets* primti
 prompt to one of the *prompt_targets* configured in the gateway. Once it detects that the user has moved away from an active
 active intent, Arch adds the ``x-arch-intent-drift`` headers to the request before sending it your application servers.

-.. literalinclude:: /_include/intent_detection.py
+.. literalinclude:: includes/rag/intent_detection.py
    :language: python
    :linenos:
    :lines: 95-125
    :emphasize-lines: 14-22
-    :caption: :download:`Intent drift detection in python </_include/intent_detection.py>`
+    :caption: Intent Detection Example

-_____________________________________________________________________________________________________________________

 .. Note::

@ -38,26 +37,26 @@ ________________________________________________________________________________
   so that you can use the most relevant prompts for your retrieval and for prompting upstream LLMs.


-Step 1: define ConversationBufferMemory
+Step 1: Define ConversationBufferMemory
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

-.. literalinclude:: /_include/intent_detection.py
+.. literalinclude:: includes/rag/intent_detection.py
    :language: python
    :linenos:
    :lines: 1-21

-Step 2: update ConversationBufferMemory w/ intent
+Step 2: Update ConversationBufferMemory w/ intent
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

-.. literalinclude:: /_include/intent_detection.py
+.. literalinclude:: includes/rag/intent_detection.py
    :language: python
    :linenos:
    :lines: 22-62

-Step 3: get Messages based on latest drift
+Step 3: Get Messages based on latest drift
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

-.. literalinclude:: /_include/intent_detection.py
+.. literalinclude:: includes/rag/intent_detection.py
    :language: python
    :linenos:
    :lines: 64-76
@ -79,18 +78,17 @@ streamline data retrieval and processing to build more efficient and precise RAG
 Step 1: Define prompt targets with parameter definitions
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

-.. literalinclude:: /_config/rag-prompt-targets.yml
+.. literalinclude:: includes/rag/prompt_targets.yaml
    :language: yaml
+    :caption: Prompt Targets
    :linenos:
-    :emphasize-lines: 16-36
-    :caption: prompt-config.yaml for parameter extraction for RAG scenarios

 Step 2: Process request parameters in Flask
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

 Once the prompt targets are configured as above, handling those parameters is

-.. literalinclude:: /_include/parameter_handling_flask.py
+.. literalinclude:: includes/rag/parameter_handling.py
    :language: python
+    :caption: Parameter handling with Flask
    :linenos:
-    :caption: Flask API example for parameter extraction via HTTP request parameters
--- a/docs/source/concepts/includes/arch_config.yaml
+++ b/docs/source/concepts/includes/arch_config.yaml
@ -0,0 +1,71 @@
+version: "0.1-beta"
+
+listener:
+  address: 0.0.0.0 # or 127.0.0.1
+  port: 10000
+  # Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
+  message_format: huggingface
+
+# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
+llm_providers:
+  - name: "OpenAI"
+    provider: "openai"
+    access_key: $OPENAI_API_KEY
+    model: gpt-4o
+    default: true
+    stream: true
+
+# default system prompt used by all prompt targets
+system_prompt: |
+  You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
+
+prompt_guards:
+  input_guards:
+    jailbreak:
+      on_exception:
+        message: "Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters."
+
+prompt_targets:
+  - name: "reboot_network_device"
+    description: "Helps network operators perform device operations like rebooting a device."
+    endpoint:
+      name: app_server
+      path: "/agent/action"
+    parameters:
+      - name: "device_id"
+        # additional type options include: int | float | bool | string | list | dict
+        type: "string"
+        description: "Identifier of the network device to reboot."
+        required: true
+      - name: "confirmation"
+        type: "string"
+        description: "Confirmation flag to proceed with reboot."
+        default: "no"
+        enum: [yes, no]
+
+  - name: "information_extraction"
+    default: true
+    description: "This prompt handles all scenarios that are question and answer in nature. Like summarization, information extraction, etc."
+    endpoint:
+      name: app_server
+      path: "/agent/summary"
+    # Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM
+    auto_llm_dispatch_on_response: true
+    # override system prompt for this prompt target
+    system_prompt: |
+      You are a helpful information extraction assistant. Use the information that is provided to you.
+
+error_target:
+  endpoint:
+    name: error_target_1
+    path: /error
+
+# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
+endpoints:
+  app_server:
+    # value could be ip address or a hostname with port
+    # this could also be a list of endpoints for load balancing
+    # for example endpoint: [ ip1:port, ip2:port ]
+    endpoint: "127.0.0.1:80"
+    # max time to wait for a connection to be established
+    connect_timeout: 0.005s
--- a/docs/source/intro/architecture/listeners/llm_provider.rst
+++ b/docs/source/intro/architecture/listeners/llm_provider.rst
@ -1,28 +1,29 @@
-.. _llm_providers:
+.. _llm_provider:

 LLM Provider
------------
+============

 ``llm_provider`` is a top-level primitive in Arch, helping developers centrally define, secure, observe,
 and manage the usage of of their LLMs. Arch builds on Envoy's reliable `cluster subsystem <https://www.envoyproxy.io/docs/envoy/v1.31.2/intro/arch_overview/upstream/cluster_manager>`_
 to manage egress traffic to LLMs, which includes intelligent routing, retry and fail-over mechanisms,
-ensuring high availability and fault tolerance. This abstraction also enables developers to seamlessly switching between LLM providers or upgrade LLM versions, simplifying the integration and scaling of LLMs across
-applications.
+ensuring high availability and fault tolerance. This abstraction also enables developers to seamlessly
+switching between LLM providers or upgrade LLM versions, simplifying the integration and scaling of LLMs
+across applications.


 Below is an example of how you can configure ``llm_providers`` with an instance of an Arch gateway.

-.. literalinclude:: /_config/getting-started.yml
+.. literalinclude:: includes/arch_config.yaml
    :language: yaml
    :linenos:
    :lines: 1-20
-    :emphasize-lines: 11-18
-    :caption: :download:`arch-getting-started.yml </_config/getting-started.yml>`
+    :emphasize-lines: 10-16
+    :caption: Example Configuration

 .. Note::
    When you start Arch, it creates a listener port for egress traffic based on the presence of ``llm_providers``
    configuration section in the ``prompt_config.yml`` file. Arch binds itself to a local address such as
-    ``127.0.0.1:9000/v1`` or a DNS-based address like ``arch.local:9000/v1`` for egress traffic.
+    ``127.0.0.1:51001/v1``.

 Arch also offers vendor-agnostic SDKs and libraries to make LLM calls to API-based LLM providers (like OpenAI,
 Anthropic, Mistral, Cohere, etc.) and supports calls to OSS LLMs that are hosted on your infrastructure. Arch
@ -31,18 +32,18 @@ calls, handling retries, managing rate limits, and ensuring seamless integration
 LLMs. Simply configure the details of the LLMs your application will use, and Arch offers a unified interface to
 make outbound LLM calls.

-Example: Using the Arch Python SDK
----------------------------------
+Example: Using the OpenAI Python SDK
+------------------------------------

 .. code-block:: python

-   from arch_client import ArchClient
+   from openai import OpenAI

   # Initialize the Arch client
-   client = ArchClient(base_url="http://127.0.0.1:9000/v1")
+   client = OpenAI(base_url="http://127.0.0.1:51001/v1")

   # Define your LLM provider and prompt
-   model_id = "openai"
+   llm_provider = "openai"
   prompt = "What is the capital of France?"

   # Send the prompt to the LLM through Arch
--- a/docs/source/concepts/prompt_target.rst
+++ b/docs/source/concepts/prompt_target.rst
@ -0,0 +1,126 @@
+Prompt Target
+==============
+
+**Prompt Targets** are a fundamental component of Arch, enabling developers to define how different types of user prompts are processed and routed within their generative AI applications.
+This section provides an in-depth look at prompt targets, including their purpose, configuration, usage, and best practices to help you effectively leverage this feature in your projects.
+
+What Are Prompt Targets?
+------------------------
+Prompt targets are predefined endpoints within Arch that handle specific types of user prompts.
+They act as the bridge between user inputs and your backend services or APIs, enabling Arch to route, process, and manage prompts efficiently.
+By defining prompt targets, you can separate your application's business logic from the complexities of prompt processing, ensuring a cleaner and more maintainable codebase.
+
+
+.. table::
+    :width: 100%
+
+    ====================    ============================================
+    **Capability**          **Description**
+    ====================    ============================================
+    Intent Recognition      Identify the purpose of a user prompt.
+    Parameter Extraction    Extract necessary data from the prompt.
+    API Invocation          Call relevant backend services or functions.
+    Response Handling       Process and return responses to the user.
+    ====================    ============================================
+
+Key Features
+~~~~~~~~~~~~
+
+Below are the key features of prompt targets that empower developers to build efficient, scalable, and personalized GenAI solutions:
+
+- **Modular Design**: Define multiple prompt targets to handle diverse functionalities.
+- **Parameter Management**: Specify required and optional parameters for each target.
+- **Function Integration**: Seamlessly connect prompts to backend APIs or functions.
+- **Error Handling**: Direct errors to designated handlers for streamlined troubleshooting.
+- **Metadata Enrichment**: Attach additional context to prompts for enhanced processing.
+
+Configuring Prompt Targets
+--------------------------
+Configuring prompt targets involves defining them in Arch's configuration file.
+Each Prompt target specifies how a particular type of prompt should be handled, including the endpoint to invoke and any parameters required.
+
+Basic Configuration
+~~~~~~~~~~~~~~~~~~~
+
+A prompt target configuration includes the following elements:
+
+.. vale Vale.Spelling = NO
+
+- ``name``: A unique identifier for the prompt target.
+- ``description``: A brief explanation of what the prompt target does.
+- ``endpoint``: The API endpoint or function that handles the prompt.
+- ``parameters`` (Optional): A list of parameters to extract from the prompt.
+
+Defining Parameters
+~~~~~~~~~~~~~~~~~~~
+Parameters are the pieces of information that Arch needs to extract from the user's prompt to perform the desired action.
+Each parameter can be marked as required or optional.
+Here is a full list of parameter attributes that Arch can support:
+
+.. table::
+    :width: 100%
+
+    ====================      ============================================================================
+    **Attribute**             **Description**
+    ====================      ============================================================================
+    ``name``                  Specifies identifier of parameters
+    ``type``                  Specifies the data type of the parameter.
+    ``description``           Provides a human-readable explanation of the parameter's purpose.
+    ``required``              Indicates whether the parameter is mandatory or optional
+    ``default``               Specifies a default value for the parameter if not provided by the user.
+    ``items``                 Used in the context of arrays to define the schema of items within an array.
+    ``format``                Specifies a format for the parameter value, e.g., date and email
+    ``enum``                  Lists the allowable values for the parameter.
+    ``minimum``               Defines the minimum acceptable value for numeric parameters.
+    ``maximum``               Specifies the maximum acceptable value for numeric parameters.
+    ====================      ============================================================================
+
+Example Configuration
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    prompt_targets:
+      - name: get_weather
+        description: Get the current weather for a location
+        parameters:
+          - name: location
+            description: The city and state, e.g. San Francisco, New York
+            type: str
+            required: true
+          - name: unit
+            description: The unit of temperature to return
+            type: str
+            enum: ["celsius", "fahrenheit"]
+        endpoint:
+          name: api_server
+          path: /weather
+
+
+Routing Logic
+-------------
+Prompt targets determine where and how user prompts are processed.
+Arch uses intelligent routing logic to ensure that prompts are directed to the appropriate targets based on their intent and context.
+
+Default Targets
+~~~~~~~~~~~~~~~
+For general-purpose prompts that do not match any specific prompt target, Arch routes them to a designated default target.
+This is useful for handling open-ended queries like document summarization or information extraction.
+
+Intent Matching
+~~~~~~~~~~~~~~~
+Arch analyzes the user's prompt to determine its intent and matches it with the most suitable prompt target based on the name and description defined in the configuration.
+
+For example:
+
+.. code-block:: bash
+
+  Prompt: "Can you reboot the router?"
+  Matching Target: reboot_device (based on description matching "reboot devices")
+
+
+Summary
+--------
+Prompt targets are essential for defining how user prompts are handled within your generative AI applications using Arch.
+By carefully configuring prompt targets, you can ensure that prompts are accurately routed, necessary parameters are extracted, and backend services are invoked seamlessly.
+This modular approach not only simplifies your application's architecture but also enhances scalability, maintainability, and overall user experience.
--- a/docs/source/intro/architecture/listeners/listeners.rst
+++ b/docs/source/intro/architecture/listeners/listeners.rst
@ -22,16 +22,16 @@ Upstream (Egress)
 Arch automatically configures a listener to route requests from your application to upstream LLM API providers (or hosts).
 When you start Arch, it creates a listener for egress traffic based on the presence of the ``llm_providers`` configuration
 section in the ``prompt_config.yml`` file. Arch binds itself to a local address such as ``127.0.0.1:9000/v1`` or a DNS-based
-address like ``arch.local:9000/v1`` for outgoing traffic. For more details on LLM providers, read :ref:`here <llm_providers>`
+address like ``arch.local:9000/v1`` for outgoing traffic. For more details on LLM providers, read :ref:`here <llm_provider>`

 Configure Listener
 ^^^^^^^^^^^^^^^^^^

 To configure a Downstream (Ingress) Listner, simply add the ``listener`` directive to your ``prompt_config.yml`` file:

-.. literalinclude:: /_config/getting-started.yml
+.. literalinclude:: ../includes/arch_config.yaml
    :language: yaml
    :linenos:
    :lines: 1-18
    :emphasize-lines: 2-5
-    :caption: :download:`arch-getting-started.yml </_config/getting-started.yml>`
+    :caption: Example Configuration
--- a/docs/source/intro/architecture/model_serving/model_serving.rst
+++ b/docs/source/intro/architecture/model_serving/model_serving.rst
@ -7,7 +7,7 @@ Arch is a set of **two** self-contained processes that are designed to run along
 servers (or on a separate host connected via a network). The first process is designated to manage low-level
 networking and HTTP related comcerns, and the other process is for **model serving**, which helps Arch make
 intelligent decisions about the incoming prompts. The model server is designed to call the purpose-built
-:ref:`LLMs <llms_in_arch>` in Arch.
+LLMs in Arch.

 .. image:: /_static/img/arch-system-architecture.jpg
   :align: center
@ -26,9 +26,9 @@ The following bash commands enable you to configure the model server subsystem i
 and only use CPU devices. This will be the slowest option but can be useful in dev/test scenarios where GPUs
 might not be available.

-.. code-block:: bash
+.. code-block:: console

-    archgw up --local -cpu
+    $ archgw up --local-cpu

 Local Serving (GPU- Fast)
 ^^^^^^^^^^^^^^^^^^^^^^^^^
@ -36,9 +36,9 @@ The following bash commands enable you to configure the model server subsystem i
 machine and utilize the GPU available for fast inference across all model use cases, including function calling
 guardails, etc.

-.. code-block:: bash
+.. code-block:: console

-    archgw up --local
+    $ archgw up --local

 Cloud Serving (GPU - Blazing Fast)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -46,11 +46,11 @@ The command below instructs Arch to intelligently use GPUs locally for fast inte
 cloud serving for function calling and guardails scenarios to dramatically improve the speed and overall performance
 of your applications.

-.. code-block:: bash
+.. code-block:: console

-    archgw up
+    $ archgw up

 .. Note::
    Arch's model serving in the cloud is priced at $0.05M/token (156x cheaper than GPT-4o) with averlage latency
-    of 200ms (10x faster than GPT-4o). Please refer to our :ref:`getting started guide <getting_started>` to know
+    of 200ms (10x faster than GPT-4o). Please refer to our :ref:`Get Started <quickstart>` to know
    how to generate API keys for model serving
--- a/docs/source/intro/architecture/prompt_processing/prompt_processing.rst
+++ b/docs/source/intro/architecture/prompt_processing/prompt_processing.rst
@ -1,11 +1,11 @@
 .. _arch_overview_prompt_handling:

-Prompts
-------
+Prompt
+=================

 Arch's primary design point is to securely accept, process and handle prompts. To do that effectively,
 Arch relies on Envoy's HTTP `connection management <https://www.envoyproxy.io/docs/envoy/v1.31.2/intro/arch_overview/http/http_connection_management>`_,
-subsystem and its **prompt handler** subsystem engineered with purpose-built :ref:`LLMs <llms_in_arch>` to
+subsystem and its **prompt handler** subsystem engineered with purpose-built LLMs to
 implement critical functionality on behalf of developers so that you can stay focused on business logic.

 .. Note::
@ -27,17 +27,18 @@ containing two key-value pairs:
 Prompt Guardrails
 -----------------

-Arch is engineered with :ref:`Arch-Guard <llms_in_arch>`, an industry leading safety layer, powered by a
+Arch is engineered with :ref:`Arch-Guard <prompt_guard>`, an industry leading safety layer, powered by a
 compact and high-performimg LLM that monitors incoming prompts to detect and reject jailbreak attempts -
 ensuring that unauthorized or harmful behaviors are intercepted early in the process.

 To add jailbreak guardrails, see example below:

-.. literalinclude:: /_config/getting-started.yml
+.. literalinclude:: ../includes/arch_config.yaml
    :language: yaml
    :linenos:
-    :emphasize-lines: 24-27
-    :caption: :download:`arch-getting-started.yml </_config/getting-started.yml>`
+    :lines: 1-45
+    :emphasize-lines: 22-26
+    :caption: Example Configuration

 .. Note::
   As a roadmap item, Arch will expose the ability for developers to define custom guardrails via Arch-Guard-v2,
@ -49,17 +50,17 @@ Prompt Targets
 --------------

 Once a prompt passes any configured guardrail checks, Arch processes the contents of the incoming conversation
-and identifies where to forwad the conversation to via its essential ``prompt_targets`` primitve. Prompt targets
-are endpoints that receive prompts that are processed by Arch. For example, Arch enriches incoming prompts with
-metadata like knowing when a user's intent has changed so that you can build faster, more accurate RAG apps.
+and identifies where to forwad the conversation to via its ``prompt_targets`` primitve. Prompt targets are endpoints
+that receive prompts that are processed by Arch. For example, Arch enriches incoming prompts with metadata like knowing
+when a user's intent has changed so that you can build faster, more accurate RAG apps.

 Configuring ``prompt_targets`` is simple. See example below:

-.. literalinclude:: /_config/getting-started.yml
+.. literalinclude:: ../includes/arch_config.yaml
    :language: yaml
    :linenos:
    :emphasize-lines: 29-38
-    :caption: :download:`arch-getting-started.yml </_config/getting-started.yml>`
+    :caption: Example Configuration


 Intent Detection and Prompt Matching:
@ -83,12 +84,12 @@ Agentic Apps via Prompt Targets
 To support agentic apps, like scheduling travel plans or sharing comments on a document - via prompts, Arch uses
 its function calling abilities to extract critical information from the incoming prompt (or a set of prompts)
 needed by a downstream backend API or function call before calling it directly. For more details on how you can
-build agentic applications using Arch, see our full guide :ref:`here <arch_function_calling_agentic_guide>`:
+build agentic applications using Arch, see our full guide :ref:`here <arch_agent_guide>`:

 .. Note::
-   Arch :ref:`Arch-FC <llms_in_arch>` is the dedicated agentic model engineered in Arch to extract information from
+   Arch :ref:`Arch-Function <function_calling>` is the dedicated agentic model engineered in Arch to extract information from
   a (set of) prompts and executes necessary backend API calls. This allows for efficient handling of agentic tasks,
-   such as scheduling data retrieval, by dynamically interacting with backend services. Arch-FC is a flagship 1.3
+   such as scheduling data retrieval, by dynamically interacting with backend services. Arch-Function is a flagship 1.3
   billion parameter model that matches performance  with frontier models like Claude Sonnet 3.5 ang GPT-4, while
   being 100x cheaper ($0.05M/token hosted) and 10x faster (p50 latencies of 200ms).

@ -97,34 +98,14 @@ Prompting LLMs
 Arch is a single piece of software that is designed to manage both ingress and egress prompt traffic, drawing its
 distributed proxy nature from the robust `Envoy <https://envoyproxy.io>`_. This makes it extremely efficient and capable
 of handling upstream connections to LLMs. If your application is originating code to an API-based LLM, simply use
-Arch's Python or JavaScript client SDK to send traffic to the desired LLM of choice. By sending traffic through Arch,
-you can propagate traces, manage and monitor traffic, apply rate limits, and utilize a large set of traffic management
-capabilities in a central place.
+the OpenAI client and configure it with Arch. By sending traffic through Arch, you can propagate traces, manage and monitor
+traffic, apply rate limits, and utilize a large set of traffic management capabilities in a centralized way.

 .. Attention::
   When you start Arch, it automatically creates a listener port for egress calls to upstream LLMs. This is based on the
   ``llm_providers`` configuration section in the ``prompt_config.yml`` file. Arch binds itself to a local address such as
-   127.0.0.1:9000/v1  or a DNS-based address like arch.local:9000/v1 for outgoing traffic.
+   127.0.0.1:51001/v1.

-Example: Using the Arch Python SDK
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. code-block:: python
-
-   from arch_client import ArchClient
-
-   # Initialize the Arch client
-   client = ArchClient(base_url="http://127.0.0.1:9000/v1")
-
-   # Define your LLM provider and prompt
-   model_id = "openai"
-   prompt = "What is the capital of France?"
-
-   # Send the prompt to the LLM through Arch
-   response = client.completions.create(llm_provider=llm_provider, prompt=prompt)
-
-   # Print the response
-   print("LLM Response:", response)

 Example: Using OpenAI Client with Arch as an Egress Gateway
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -134,7 +115,7 @@ Example: Using OpenAI Client with Arch as an Egress Gateway
   import openai

   # Set the OpenAI API base URL to the Arch gateway endpoint
-   openai.api_base = "http://127.0.0.1:9000/v1"
+   openai.api_base = "http://127.0.0.1:51001/v1"

   # No need to set openai.api_key since it's configured in Arch's gateway

@ -148,8 +129,8 @@ Example: Using OpenAI Client with Arch as an Egress Gateway

 In these examples:

-    The ArchClient is used to send traffic directly through the Arch egress proxy to the LLM of your choice, such as OpenAI.
-    The OpenAI client is configured to route traffic via Arch by setting the proxy to 127.0.0.1:9000, assuming Arch is
+    The OpenAI client is used to send traffic directly through the Arch egress proxy to the LLM of your choice, such as OpenAI.
+    The OpenAI client is configured to route traffic via Arch by setting the proxy to 127.0.0.1:51001, assuming Arch is
    running locally and bound to that address and port.

 This setup allows you to take advantage of Arch's advanced traffic management features while interacting with LLM APIs like OpenAI.
--- a/docs/source/concepts/tech_overview/request_lifecycle.rst
+++ b/docs/source/concepts/tech_overview/request_lifecycle.rst
@ -1,9 +1,9 @@
-.. _life_of_a_request:
+.. _lifecycle_of_a_request:

-Life of a Request
+Request Lifecycle
 =================

-Below we describe the events in the life of a request passing through an Arch gateway instance. We first
+Below we describe the events in the lifecycle of a request passing through an Arch gateway instance. We first
 describe how Arch fits into the request path and then the internal events that take place following
 the arrival of a request at Arch from downtream clients. We follow the request until the corresponding
 dispatch upstream and the response path.
@ -48,7 +48,7 @@ Arch is a set of **two** self-contained processes that are designed to run along
 (or on a separate server connected to your application servers via a network). The first process is designated
 to manage HTTP-level networking and connection management concerns (protocol management, request id generation,
 header sanitization, etc.), and the other process is for **model serving**, which helps Arch make intelligent
-decisions about the incoming prompts. The model server hosts the purpose-built :ref:`LLMs <llms_in_arch>` to
+decisions about the incoming prompts. The model server hosts the purpose-built LLMs to
 manage several critical, but undifferentiated, prompt related tasks on behalf of developers.


@ -62,7 +62,7 @@ The request processing path in Arch has three main parts:
  hosted endpoint that implements domain-specific business logic for incoming promots. This is where knowledge
  of targets and endpoint health, load balancing and connection pooling exists.
 * :ref:`Model serving subsystem <arch_model_serving>` which helps Arch make intelligent decisions about the
-  incoming prompts. The model server is designed to call the purpose-built :ref:`LLMs <llms_in_arch>` in Arch.
+  incoming prompts. The model server is designed to call the purpose-built LLMs in Arch.

 The three subsystems are bridged with either the HTTP router filter, and the cluster manager subsystems of Envoy.

@ -80,7 +80,7 @@ Configuration

 Today, only support a static bootstrap configuration file for simplicity today:

-.. literalinclude:: /_config/getting-started.yml
+.. literalinclude:: ../includes/arch_config.yaml
    :language: yaml


@ -89,7 +89,7 @@ Request Flow (Ingress)

 Overview
 ^^^^^^^^
-A brief outline of the life cycle of a request and response using the example configuration above:
+A brief outline of the lifecycle of a request and response using the example configuration above:

 1. **TCP Connection Establishment**:
   A TCP connection from downstream is accepted by an Arch listener running on a worker thread.
@ -137,7 +137,7 @@ Request Flow (Egress)
 Overview
 --------

-A brief outline of the life cycle of a request and response in the context of egress traffic from an application
+A brief outline of the lifecycle of a request and response in the context of egress traffic from an application
 to Large Language Models (LLMs) via Arch:

 1. **HTTP Connection Establishment to LLM**:
--- a/docs/source/concepts/tech_overview/tech_overview.rst
+++ b/docs/source/concepts/tech_overview/tech_overview.rst
@ -0,0 +1,14 @@
+.. _tech_overview:
+
+Tech Overview
+=============
+
+.. toctree::
+    :maxdepth: 2
+
+    terminology
+    threading_model
+    listener
+    model_serving
+    prompt
+    request_lifecycle
--- a/docs/source/intro/architecture/intro/terminology.rst
+++ b/docs/source/intro/architecture/intro/terminology.rst
@ -22,7 +22,7 @@ before forwarding them to your application server endpoints. rch enables you to

   When you start Arch, you specify a listener address/port that you want to bind downstream. But, Arch uses are predefined port
   that you can use (``127.0.0.1:10000``) to proxy egress calls originating from your application to LLMs (API-based or hosted).
-   For more details, check out :ref:`LLM providers <llm_providers>`
+   For more details, check out :ref:`LLM providers <llm_provider>`

 **Instance**: An instance of the Arch gateway. When you start Arch it creates at most two processes. One to handle Layer 7
 networking operations (auth, tls, observability, etc) and the second process to serve models that enable it to make smart
@ -43,4 +43,4 @@ and take appropriate actions.

 **Model Serving**: Arch is a set of **two** self-contained processes that are designed to run alongside your application servers
 (or on a separate hostconnected via a network).The  **model serving** process helps Arch make intelligent decisions about the
-incoming prompts. The model server is designed to call the (fast) purpose-built :ref:`LLMs <llms_in_arch>` in Arch.
+incoming prompts. The model server is designed to call the (fast) purpose-built LLMs in Arch.
--- a/docs/source/intro/architecture/intro/threading_model.rst
+++ b/docs/source/intro/architecture/intro/threading_model.rst
@ -1,6 +1,6 @@
 .. _arch_overview_threading:

-Threading model
+Threading Model
 ===============

 Arch builds on top of Envoy's single process with multiple threads architecture.
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@ -5,24 +5,35 @@

 # -- Project information -----------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
+from dataclasses import asdict

-project = 'Arch'
+from sphinx.application import Sphinx
+from sphinx.util.docfields import Field
+from sphinxawesome_theme import ThemeOptions
+from sphinxawesome_theme.postprocess import Icons
+
+project = 'Arch Docs'
 copyright = '2024, Katanemo Labs, Inc'
 author = 'Katanemo Labs, Inc'
-release = '0.1-beta'
+release = ' v0.1'

 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration


-root_doc = 'root'
+root_doc = 'index'
+
+nitpicky = True
+add_module_names = False

 # -- General configuration ---------------------------------------------------
 extensions = [
-    'sphinx.ext.autodoc',    # For generating documentation from docstrings
-    'sphinx.ext.napoleon',   # For Google style and NumPy style docstrings
-    'sphinx_copybutton',
-    'sphinx.ext.viewcode',
+    "sphinx.ext.autodoc",
+    "sphinx.ext.intersphinx",
+    "sphinx.ext.extlinks",
+    "sphinx.ext.viewcode",
+    "sphinx_sitemap",
+    "sphinx_design"
 ]

 # Paths that contain templates, relative to this directory.
@ -32,22 +43,99 @@ templates_path = ['_templates']
 # to ignore when looking for source files.
 exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']

-html_favicon = '_static/favicon.ico'

 # -- Options for HTML output -------------------------------------------------
-html_theme = 'sphinx_book_theme'  # You can change the theme to 'sphinx_rtd_theme' or another of your choice.
+html_theme = 'sphinxawesome_theme'  # You can change the theme to 'sphinx_rtd_theme' or another of your choice.
+html_title = project + release
+html_permalinks_icon = Icons.permalinks_icon
+html_favicon = '_static/favicon.ico'
+html_logo = '_static/favicon.ico' # Specify the path to the logo image file (make sure the logo is in the _static directory)
+html_last_updated_fmt = ""
+html_use_index = False  # Don't create index
+html_domain_indices = False  # Don't need module indices
+html_copy_source = False  # Don't need sources
+html_show_sphinx = False

-# Specify the path to the logo image file (make sure the logo is in the _static directory)
-html_logo = '_static/img/arch-nav-logo.png'

-html_theme_options = {
-    'navigation_depth': 4,
-    'collapse_navigation': False,
+html_baseurl = './docs'
+
+html_sidebars = {
+  "**": ['analytics.html', "sidebar_main_nav_links.html", "sidebar_toc.html", ]
 }

+theme_options = ThemeOptions(
+    show_breadcrumbs=True,
+    awesome_external_links=True,
+    extra_header_link_icons={
+        "repository on GitHub": {
+            "link": "https://github.com/katanemo/arch",
+            "icon": (
+                '<svg height="26px" style="margin-top:-2px;display:inline" '
+                'viewBox="0 0 45 44" '
+                'fill="currentColor" xmlns="http://www.w3.org/2000/svg">'
+                '<path fill-rule="evenodd" clip-rule="evenodd" '
+                'd="M22.477.927C10.485.927.76 10.65.76 22.647c0 9.596 6.223 17.736 '
+                "14.853 20.608 1.087.2 1.483-.47 1.483-1.047 "
+                "0-.516-.019-1.881-.03-3.693-6.04 "
+                "1.312-7.315-2.912-7.315-2.912-.988-2.51-2.412-3.178-2.412-3.178-1.972-1.346.149-1.32.149-1.32 "  # noqa
+                "2.18.154 3.327 2.24 3.327 2.24 1.937 3.318 5.084 2.36 6.321 "
+                "1.803.197-1.403.759-2.36 "
+                "1.379-2.903-4.823-.548-9.894-2.412-9.894-10.734 "
+                "0-2.37.847-4.31 2.236-5.828-.224-.55-.969-2.759.214-5.748 0 0 "
+                "1.822-.584 5.972 2.226 "
+                "1.732-.482 3.59-.722 5.437-.732 1.845.01 3.703.25 5.437.732 "
+                "4.147-2.81 5.967-2.226 "
+                "5.967-2.226 1.185 2.99.44 5.198.217 5.748 1.392 1.517 2.232 3.457 "
+                "2.232 5.828 0 "
+                "8.344-5.078 10.18-9.916 10.717.779.67 1.474 1.996 1.474 4.021 0 "
+                "2.904-.027 5.247-.027 "
+                "5.96 0 .58.392 1.256 1.493 1.044C37.981 40.375 44.2 32.24 44.2 "
+                '22.647c0-11.996-9.726-21.72-21.722-21.72" '
+                'fill="currentColor"/></svg>'
+            ),
+        },
+    },
+)
+
+html_theme_options = asdict(theme_options)
+
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ['_static']

-#html_style = 'css/arch.css'
+pygments_style = "lovelace"
+pygments_style_dark = "github-dark"
+
+sitemap_url_scheme = "{link}"
+# Add this configuration at the bottom of your conf.py
+
+html_context = {
+    'google_analytics_id': 'G-K2LXXSX6HB',  # Replace with your Google Analytics tracking ID
+}
+
+templates_path = ['_templates']
+
+# -- Register a :confval: interpreted text role ----------------------------------
+def setup(app: Sphinx) -> None:
+    """Register the ``confval`` role and directive.
+
+    This allows to declare theme options as their own object
+    for styling and cross-referencing.
+    """
+    app.add_object_type(
+        "confval",
+        "confval",
+        objname="configuration parameter",
+        doc_field_types=[
+            Field(
+                "default",
+                label="default",
+                has_arg=True,
+                names=("default",),
+                bodyrolename="class",
+            )
+        ],
+    )
+
+    app.add_css_file('_static/custom.css')
--- a/docs/source/docutils.conf
+++ b/docs/source/docutils.conf
@ -0,0 +1,2 @@
+[restructuredtext parser]
+syntax_highlight = short
--- a/docs/source/get_started/includes/quickstart.yaml
+++ b/docs/source/get_started/includes/quickstart.yaml
@ -0,0 +1,47 @@
+version: "0.1-beta"
+listen:
+  address: 127.0.0.1 | 0.0.0.0
+  port_value: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
+
+system_prompt: |
+  You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
+
+llm_providers:
+  - name: "OpenAI"
+    provider: "openai"
+    access_key: OPENAI_API_KEY
+    model: gpt-4o
+    stream: true
+
+prompt_targets:
+  - name: reboot_devices
+    description: >
+      This prompt target handles user requests to reboot devices.
+      It ensures that when users request to reboot specific devices or device groups, the system processes the reboot commands accurately.
+
+      **Examples of user prompts:**
+
+      - "Please reboot device 12345."
+      - "Restart all devices in tenant group tenant-XYZ
+      - "I need to reboot devices A, B, and C."
+
+    path: /agent/device_reboot
+    parameters:
+      - name: "device_ids"
+        type: list  # Options: integer | float | list | dictionary | set
+        description: "A list of device identifiers (IDs) to reboot."
+        required: false
+      - name: "device_group"
+        type: string  # Options: string | integer | float | list | dictionary | set
+        description: "The name of the device group to reboot."
+        required: false
+
+# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
+endpoints:
+  app_server:
+    # value could be ip address or a hostname with port
+    # this could also be a list of endpoints for load balancing for example endpoint: [ ip1:port, ip2:port ]
+    endpoint: "127.0.0.1:80"
+    # max time to wait for a connection to be established
+    connect_timeout: 0.005s
+  version: "0.1-beta"
--- a/docs/source/get_started/intro_to_arch.rst
+++ b/docs/source/get_started/intro_to_arch.rst
@ -1,9 +1,11 @@
-What is Arch
-============
+.. _intro_to_arch:
+
+Intro to Arch
+=============

 Arch is an intelligent `(Layer 7) <https://www.cloudflare.com/learning/ddos/what-is-layer-7/>`_ gateway
 designed for generative AI apps, AI agents, and Co-pilots that work with prompts. Engineered with purpose-built
-:ref:`LLMs <llms_in_arch>`, Arch handles all the critical but undifferentiated tasks related to the handling and
+large language models (LLMs), Arch handles all the critical but undifferentiated tasks related to the handling and
 processing of prompts, including detecting and rejecting `jailbreak <https://github.com/verazuo/jailbreak_llms>`_
 attempts, intelligently calling “backend” APIs to fulfill the user's request represented in a prompt, routing to
 and offering disaster recovery between upstream LLMs, and managing the observability of prompts and LLM interactions
@ -43,7 +45,7 @@ functionality exclusively for prompts and LLMs. This gives Arch several advantag
  of deploying library upgrades in your applications.

 **Engineered with Fast LLMs:** Arch is engineered with specialized (sub-billion) LLMs that are desgined for
-fast, cost-effective and acurrate handling of prompts. These :ref:`LLMs <llms_in_arch>` are designed to be
+fast, cost-effective and acurrate handling of prompts. These LLMs are designed to be
 best-in-class for critcal prompt-related tasks like:

 * **Function/API Calling:** Arch helps you easily personalize your applications by enabling calls to
@ -59,32 +61,30 @@ best-in-class for critcal prompt-related tasks like:
  attempts or toxicity present in user's prompts without having to write a single line of code. To learn more
  about how to configure guardrails available in Arch, read :ref:`prompt processing <arch_overview_prompt_handling>`.

-* **Intent-Drift Detection:** Developers struggle to handle `follow-up <https://www.reddit.com/r/ChatGPTPromptGenius/comments/17dzmpy/how_to_use_rag_with_conversation_history_for/?>`_,
+* **[Coming Soon] Intent-Markers:** Developers struggle to handle `follow-up <https://www.reddit.com/r/ChatGPTPromptGenius/comments/17dzmpy/how_to_use_rag_with_conversation_history_for/?>`_,
  or `clarifying <https://www.reddit.com/r/LocalLLaMA/comments/18mqwg6/best_practice_for_rag_with_followup_chat/>`_
  questions. Specifically, when users ask for modifications or additions to previous responses their AI applications
-  often generate entirely new responses instead of adjusting the previous ones. Arch offers intent-drift detection as a
+  often generate entirely new responses instead of adjusting the previous ones. Arch offers intent-markers as a
  feature so that developers know when the user has shifted away from the previous intent so that they can improve
  their retrieval, lower overall token cost and dramatically improve the speed and accuracy of their responses back
-  to users.
+  to users. For more details :ref:`intent markers<arch_rag_guide>`

-**Traffic Management:** Arch offers several capabilities for LLM calls originating from your applications, including a
-vendor-agnostic SDK to make LLM calls, smart retries on errors from upstream LLMs, and automatic cutover to other LLMs
-configured in Arch for continuous availability and disaster recovery scenarios. Arch extends Envoy's `cluster subsystem
-<https://www.envoyproxy.io/docs/envoy/latest/intro/arch_overview/upstream/cluster_manager>`_ to manage upstream connections
-to LLMs so that you can build resilient AI applications.
+**Traffic Management:** Arch offers several capabilities for LLM calls originating from your applications, including smart
+retries on errors from upstream LLMs, and automatic cutover to other LLMs configured in Arch for continuous availability
+and disaster recovery scenarios. Arch extends Envoy's `cluster subsystem <https://www.envoyproxy.io/docs/envoy/latest/intro/arch_overview/upstream/cluster_manager>`_
+to manage upstream connections to LLMs so that you can build resilient AI applications.

 **Front/edge Gateway:** There is substantial benefit in using the same software at the edge (observability,
 traffic shaping alogirithms, applying guardrails, etc.) as for outbound LLM inference use cases. Arch has the feature set
-that makes it exceptionally well suited as an edge gateway for AI applications. This includes TLS termination, rate limiting,
-and prompt-based routing.
+that makes it exceptionally well suited as an edge gateway for AI applications. This includes TLS termination, applying
+guardrail early in the pricess, intelligent parameter gathering from prompts, and prompt-based routing to backend APIs.

-**Best-In Class Monitoring:** Arch offers several monitoring metrics that help you understand three
-critical aspects of your application: latency, token usage, and error rates by an upstream LLM provider. Latency
-measures the speed at which your application is responding to users, which includes metrics like time to first
-token (TFT), time per output token (TOT) metrics, and the total latency as perceived by users.
+**Best-In Class Monitoring:** Arch offers several monitoring metrics that help you understand three critical aspects of
+your application: latency, token usage, and error rates by an upstream LLM provider. Latency measures the speed at which
+your application is responding to users, which includes metrics like time to first token (TFT), time per output token (TOT)
+metrics, and the total latency as perceived by users.

-**End-to-End Tracing:** Arch propagates trace context using the W3C Trace Context standard, specifically through
-the ``traceparent`` header. This allows each component in the system to record its part of the request flow,
-enabling **end-to-end tracing** across the entire application. By using OpenTelemetry, Arch ensures that
-developers can capture this trace data consistently and in a format compatible with various observability tools.
-For more details, read :ref:`tracing <arch_overview_tracing>`.
+**End-to-End Tracing:** Arch propagates trace context using the W3C Trace Context standard, specifically through the
+``traceparent`` header. This allows each component in the system to record its part of the request flow, enabling **end-to-end tracing**
+across the entire application. By using OpenTelemetry, Arch ensures that developers can capture this trace data consistently and
+in a format compatible with various observability tools. For more details, read :ref:`tracing <arch_overview_tracing>`.
--- a/docs/source/get_started/overview.rst
+++ b/docs/source/get_started/overview.rst
@ -0,0 +1,91 @@
+Overview
+============
+Welcome to Arch, the intelligent prompt gateway designed to help developers build **fast**, **secure**, and **personalized** generative AI apps at ANY scale.
+In this documentation, you will learn how to quickly set up Arch to trigger API calls via prompts, apply prompt guardrails without writing any application-level logic,
+simplify the interaction with upstream LLMs, and improve observability all while simplifying your application development process.
+
+
+Get Started
+-----------
+
+This section introduces you to Arch and helps you get set up quickly:
+
+.. grid:: 3
+
+    .. grid-item-card:: Overview
+        :link: overview.html
+
+        Overview of Arch and Doc navigation
+
+    .. grid-item-card:: Intro to Arch
+        :link: intro_to_arch.html
+
+        Explore Arch's features and developer workflow
+
+    .. grid-item-card:: Quickstart
+        :link: quickstart.html
+
+        Learn how to quickly set up and integrate
+
+
+Concepts
+--------
+
+Deep dive into essential ideas and mechanisms behind Arch:
+
+.. grid:: 3
+
+    .. grid-item-card:: Tech Overview
+        :link: ../Concepts/tech_overview/tech_overview.html
+
+        Learn about the technology stack
+
+    .. grid-item-card:: LLM Provider
+        :link: ../Concepts/llm_provider.html
+
+        Explore Arch’s LLM integration options
+
+    .. grid-item-card:: Targets
+        :link: ../Concepts/prompt_target.html
+
+        Understand how Arch handles prompts
+
+
+Guides
+------
+Step-by-step tutorials for practical Arch use cases and scenarios:
+
+.. grid:: 3
+
+    .. grid-item-card:: Prompt Guard
+        :link: ../guides/tech_overview/tech_overview.html
+
+        Instructions on securing and validating prompts
+
+    .. grid-item-card:: Function Calling
+        :link: ../guides/function_calling.html
+
+        A guide to effective function calling
+
+    .. grid-item-card:: Observability
+        :link: ../guides/prompt_target.html
+
+        Learn to monitor and troubleshoot Arch
+
+
+Build with Arch
+---------------
+
+For developers extending and customizing Arch for specialized needs:
+
+.. grid:: 2
+
+    .. grid-item-card:: Agentic Workflow
+        :link: ../build_with_arch/agent.html
+
+        Discover how to create and manage custom agents within Arch
+
+    .. grid-item-card:: RAG Application
+        :link: ../build_with_arch/rag.html
+
+        Integrate RAG for knowledge-driven responses
--- a/docs/source/get_started/quickstart.rst
+++ b/docs/source/get_started/quickstart.rst
@ -0,0 +1,84 @@
+.. _quickstart:
+
+Quickstart
+================
+
+Follow this guide to learn how to quickly set up Arch and integrate it into your generative AI applications.
+
+
+Prerequisites
+----------------------------
+
+Before you begin, ensure you have the following:
+
+.. vale Vale.Spelling = NO
+
+- ``Docker`` & ``Python`` installed on your system
+- ``API Keys`` for LLM providers (if using external LLMs)
+
+The fastest way to get started using Arch is to use `katanemo/arch <https://hub.docker.com/r/katanemo/arch>`_ pre-built binaries.
+You can also build it from source.
+
+
+Step 1: Install Arch
+----------------------------
+Arch's CLI allows you to manage and interact with the Arch gateway efficiently. To install the CLI, simply
+run the following command:
+
+.. code-block:: console
+
+    $ pip install archgw
+
+This will install the archgw command-line tool globally on your system.
+
+.. tip::
+   We recommend that developers create a new Python virtual environment to isolate dependencies before installing Arch.
+   This ensures that `archgw` and its dependencies do not interfere with other packages on your system.
+
+   To create and activate a virtual environment, you can run the following commands:
+
+   .. code-block:: console
+
+      $ python -m venv venv
+      $ source venv/bin/activate   # On Windows, use: venv\Scripts\activate
+      $ pip install archgw
+
+
+Step 2: Config Arch
+-------------------
+
+Arch operates based on a configuration file where you can define LLM providers, prompt targets, and guardrails, etc.
+Below is an example configuration to get you started, including:
+
+.. vale Vale.Spelling = NO
+
+- ``endpoints``: Specifies where Arch listens for incoming prompts.
+- ``system_prompts``: Defines predefined prompts to set the context for interactions.
+- ``llm_providers``: Lists the LLM providers Arch can route prompts to.
+- ``prompt_guards``: Sets up rules to detect and reject undesirable prompts.
+- ``prompt_targets``: Defines endpoints that handle specific types of prompts.
+- ``error_target``: Specifies where to route errors for handling.
+
+.. literalinclude:: includes/quickstart.yaml
+    :language: yaml
+
+
+Step 3: Start Arch Gateway
+--------------------------
+
+.. code-block:: console
+
+    $ archgw up [path_to_config]
+
+
+
+Next Steps
+-------------------
+
+Congratulations! You've successfully set up Arch and made your first prompt-based request. To further enhance your GenAI applications, explore the following resources:
+
+- Full Documentation: Comprehensive guides and references.
+- `GitHub Repository <https://github.com/katanemo/arch>`_: Access the source code, contribute, and track updates.
+- `Support <https://github.com/katanemo/arch#contact>`_: Get help and connect with the Arch community .
+
+With Arch, building scalable, fast, and personalized GenAI applications has never been easier. Dive deeper into Arch's capabilities and start creating innovative AI-driven experiences today!
--- a/docs/source/getting_started/getting_started.rst
+++ b/docs/source/getting_started/getting_started.rst
@ -1,48 +0,0 @@
-.. _getting_started:
-
-Getting Started
-================
-
-.. sidebar:: Pre-requisites
-
-    In order for you to get started, please make sure that `Docker <https://www.docker.com/get-started>`_
-    and `Python <https://www.python.org/downloads/>`_ are installed locally.
-
-    As the examples use the pre-built `Arch Docker images <https://hub.docker.com/r/katanemo/arch>`_,
-    they should work on the following architectures:
-
-        - x86_64
-        - ARM 64
-
-
-This section gets you started with a very simple configuration and provides some example configurations.
-
-
-The fastest way to get started using Arch is installing `pre-built binaries <https://hub.docker.com/r/katanemo/arch>`_.
-You can also build it from source.
-
-Step 1: Install the Arch CLI
----------------------------
-Arch's CLI allows you to manage and interact with the Arch gateway efficiently. To install the CLI, simply
-run the following command:
-
-.. code-block:: bash
-
-    pip install archgw
-
-This will install the archgw command-line tool globally on your system.
-
-Step 2: Start Arch Gateway
--------------------------
-
-.. code-block:: bash
-
-    archgw up --quick-start
-
-Configuration
-------------
-
-Today, only support a static bootstrap configuration file for simplicity today:
-
-.. literalinclude:: /_config/getting-started.yml
-    :language: yaml
--- a/docs/source/getting_started/use_cases.rst
+++ b/docs/source/getting_started/use_cases.rst
@ -1,6 +0,0 @@
-.. toctree::
-  :maxdepth: 2
-  :caption: Use Cases
-
-  use_cases/rag
-  use_cases/function_calling
--- a/docs/source/guides/function_calling.rst
+++ b/docs/source/guides/function_calling.rst
@ -0,0 +1,226 @@
+.. _function_calling:
+
+Function Calling
+================
+
+**Function Calling** is a powerful feature in Arch that allows your application to dynamically execute backend functions or services based on user prompts.
+This enables seamless integration between natural language interactions and backend operations, turning user inputs into actionable results.
+
+
+What is Function Calling?
+-------------------------
+
+Function Calling refers to the mechanism where the user's prompt is parsed, relevant parameters are extracted, and a designated backend function (or API) is triggered to execute a particular task.
+This feature bridges the gap between generative AI systems and functional business logic, allowing users to interact with the system through natural language while the backend performs the necessary operations.
+
+Function Calling Workflow
+-------------------------
+
+#. **Prompt Parsing**
+
+    When a user submits a prompt, Arch analyzes it to determine the intent. Based on this intent, the system identifies whether a function needs to be invoked and which parameters should be extracted.
+
+#. **Parameter Extraction**
+
+    Arch’s advanced natural language processing capabilities automatically extract parameters from the prompt that are necessary for executing the function. These parameters can include text, numbers, dates, locations, or other relevant data points.
+
+#. **Function Invocation**
+
+    Once the necessary parameters have been extracted, Arch invokes the relevant backend function. This function could be an API, a database query, or any other form of backend logic. The function is executed with the extracted parameters to produce the desired output.
+
+#. **Response Handling**
+
+    After the function has been called and executed, the result is processed and a response is generated. This response is typically delivered in a user-friendly format, which can include text explanations, data summaries, or even a confirmation message for critical actions.
+
+
+Arch-Function
+-------------------------
+The `Arch-Function <https://huggingface.co/collections/katanemolabs/arch-function-66f209a693ea8df14317ad68>`_ collection of large language models (LLMs) is a collection state-of-the-art (SOTA) LLMs specifically designed for **function calling** tasks.
+The models are designed to understand complex function signatures, identify required parameters, and produce accurate function call outputs based on natural language prompts.
+Achieving performance on par with GPT-4, these models set a new benchmark in the domain of function-oriented tasks, making them suitable for scenarios where automated API interaction and function execution is crucial.
+
+In summary, the Arch-Function collection demonstrates:
+
+- **State-of-the-art performance** in function calling
+- **Accurate parameter identification and suggestion**, even in ambiguous or incomplete inputs
+- **High generalization** across multiple function calling use cases, from API interactions to automated backend tasks.
+- Optimized **low-latency, high-throughput performance**, making it suitable for real-time, production environments.
+
+
+Key Features
+~~~~~~~~~~~~
+.. table::
+    :width: 100%
+
+    =========================   ===============================================================
+    **Functionality**	        **Definition**
+    =========================   ===============================================================
+    Single Function Calling	    Call only one function per user prompt
+    Parallel Function Calling	Call the same function multiple times but with parameter values
+    Multiple Function Calling	Call different functions per user prompt
+    Parallel & Multiple	        Perform both parallel and multiple function calling
+    =========================   ===============================================================
+
+
+Supported Languages
+~~~~~~~~~~~~~~~~~~~
+.. table::
+    :width: 100%
+
+    =========================   ===========================================================================================================================================
+    **Language**	            **Data Type**
+    =========================   ===========================================================================================================================================
+    Python	                    ``int``, ``str``, ``float``, ``bool``, ``list``, ``set``, ``dict``, ``tuple``
+    Java	                    ``byte``, ``short``, ``int``, ``long``, ``float``, ``double``, ``boolean``, ``char``, ``Array``, ``ArrayList``, ``Set``, ``HashMap``, ``Hashtable``, ``Queue``, ``Stack``
+    Javascript	                ``Number``, ``Bigint``, ``String``, ``Boolean``, ``Object``, ``Array``, ``Date``
+    =========================   ===========================================================================================================================================
+
+
+Implementing Function Calling
+-----------------------------
+
+Here’s a step-by-step guide to configuring function calling within your Arch setup:
+
+Step 1: Define the Function
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Create or identify the backend function you want Arch to call. This could be an API endpoint, a script, or any other executable backend logic.
+
+.. code-block:: python
+    :caption: Example Function
+
+    import requests
+
+    def get_weather(location: str, unit: str = "fahrenheit"):
+        if unit not in ["celsius", "fahrenheit"]:
+            raise ValueError("Invalid unit. Choose either 'celsius' or 'fahrenheit'.")
+
+        api_server = "https://api.yourweatherapp.com"
+        endpoint = f"{api_server}/weather"
+
+        params = {
+            "location": location,
+            "unit": unit
+        }
+
+        response = requests.get(endpoint, params=params)
+        return response.json()
+
+    # Example usage
+    weather_info = get_weather("Seattle, WA", "celsius")
+    print(weather_info)
+
+
+Step 2: Configure Prompt Targets
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Map the function to a prompt target, defining the intent and parameters that Arch will extract from the user’s prompt.
+
+.. code-block:: yaml
+    :caption: Example Config
+
+    prompt_targets:
+      - name: get_weather
+        description: Get the current weather for a location
+        parameters:
+          - name: location
+            description: The city and state, e.g. San Francisco, New York
+            type: str
+            required: true
+          - name: unit
+            description: The unit of temperature to return
+            type: str
+            enum: ["celsius", "fahrenheit"]
+        endpoint:
+          name: api_server
+          path: /weather
+
+Step 3: Validate Parameters
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Arch will validate parameters and ensure that the required parameters (e.g., location) are present in the prompt, and add validation rules if necessary.
+Here is ane example validation schema using the `jsonschema <https://json-schema.org/docs>`_ library
+
+.. code-block:: python
+    :caption: Example Validation Schema
+
+    import requests
+    from jsonschema import validate, ValidationError
+
+    # Define the JSON Schema for parameter validation
+    weather_validation_schema = {
+        "type": "object",
+        "properties": {
+            "location": {
+                "type": "string",
+                "minLength": 1,
+                "description": "The city and state, e.g. 'San Francisco, New York'"
+            },
+            "unit": {
+                "type": "string",
+                "enum": ["celsius", "fahrenheit"],
+                "description": "The unit of temperature to return"
+            }
+        },
+        "required": ["location"],
+        "additionalProperties": False
+    }
+
+    def get_weather(location: str, unit: str = "fahrenheit"):
+        # Create the data object for validation
+        params = {
+            "location": location,
+            "unit": unit
+        }
+
+        # Validate parameters using JSON Schema
+        try:
+            validate(instance=params, schema=weather_validation_schema)
+        except ValidationError as e:
+            raise ValueError(f"Invalid input: {e.message}")
+
+        # Prepare the API request
+        api_server = "https://api.yourweatherapp.com"
+        endpoint = f"{api_server}/weather"
+
+        # Make the API request
+        response = requests.get(endpoint, params=params)
+        return response.json()
+
+    # Example usage
+    weather_info = get_weather("Seattle, WA", "celsius")
+    print(weather_info)
+
+
+Step 4: Execute and Return the Response
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Once the function is called, format the response and send it back to Arch-Function.
+Next, Arch-Function provides users with coherent and user-friendly responses.
+
+
+Example Use Cases
+-----------------
+
+Here are some common use cases where Function Calling can be highly beneficial:
+
+- **Data Retrieval**: Extracting information from databases or APIs based on user inputs (e.g., checking account balances, retrieving order status).
+- **Transactional Operations**: Executing business logic such as placing an order, processing payments, or updating user profiles.
+- **Information Aggregation**: Fetching and combining data from multiple sources (e.g., displaying travel itineraries or combining analytics from various dashboards).
+- **Task Automation**: Automating routine tasks like setting reminders, scheduling meetings, or sending emails.
+- **User Personalization**: Tailoring responses based on user history, preferences, or ongoing interactions.
+
+Best Practices and Tips
+-----------------------
+When integrating function calling into your generative AI applications, keep these tips in mind to get the most out of our Arch-Function models:
+
+- **Keep it clear and simple**: Your function names and parameters should be straightforward and easy to understand. Think of it like explaining a task to a smart colleague - the clearer you are, the better the results.
+
+- **Context is king**: Don't skimp on the descriptions for your functions and parameters. The more context you provide, the better the LLM can understand when and how to use each function.
+
+- **Be specific with your parameters**: Instead of using generic types, get specific. If you're asking for a date, say it's a date. If you need a number between 1 and 10, spell that out. The more precise you are, the more accurate the LLM's responses will be.
+
+- **Expect the unexpected**: Test your functions thoroughly, including edge cases. LLMs can be creative in their interpretations, so it's crucial to ensure your setup is robust and can handle unexpected inputs.
+
+- **Watch and learn**: Pay attention to how the LLM uses your functions. Which ones does it call often? In what contexts? This information can help you optimize your setup over time.
+
+Remember, working with LLMs is part science, part art. Don't be afraid to experiment and iterate to find what works best for your specific use case.
--- a/docs/source/guides/observability/access_logging.rst
+++ b/docs/source/guides/observability/access_logging.rst
@ -7,8 +7,8 @@ Access logging in Arch refers to the logging of detailed information about each
 It provides visibility into the traffic passing through Arch, which is crucial for monitoring, debugging, and analyzing the
 behavior of AI applications and their interactions.

-Key Features of Access Logging in Arch:
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Key Features
+^^^^^^^^^^^^
 * **Per-Request Logging**:
  Each request that passes through Arch is logged. This includes important metadata such as HTTP method,
  path, response status code, request duration, upstream host, and more.
@ -16,8 +16,8 @@ Key Features of Access Logging in Arch:
  Access logs can be exported to centralized logging systems (e.g., ELK stack or Fluentd) or used to feed monitoring and alerting systems.
 * **Structured Logging**: where each request is logged as a object, making it easier to parse and analyze using tools like Elasticsearch and Kibana.

-.. code-block:: yaml
+.. code-block:: console

-    [2024-09-27T14:52:01.123Z] "ARCH REQUEST" GET /path/to/resource HTTP/1.1 200 512 1024 56 upstream_service.com D
+    $ [2024-09-27T14:52:01.123Z] "ARCH REQUEST" GET /path/to/resource HTTP/1.1 200 512 1024 56 upstream_service.com D
    X-Arch-Upstream-Service-Time: 25
    X-Arch-Attempt-Count: 1
--- a/docs/source/guides/observability/monitoring.rst
+++ b/docs/source/guides/observability/monitoring.rst
--- a/docs/source/guides/observability/observability.rst
+++ b/docs/source/guides/observability/observability.rst
@ -7,5 +7,5 @@ Observability
  :maxdepth: 2

  tracing
-  stats
-  access_logs
+  monitoring
+  access_logging
--- a/docs/source/guides/observability/tracing.rst
+++ b/docs/source/guides/observability/tracing.rst
@ -22,9 +22,9 @@ and understanding complex AI agent architectures like Co-pilots.
 flow, enabling **end-to-end tracing** across the entire application. By using OpenTelemetry, Arch ensures
 that developers can capture this trace data consistently and in a format compatible with various observability
 tools.
-______________________________________________________________________________________________

-Benefits of using ``traceparent`` headers
+
+Benefits of Using ``Traceparent`` Headers
 -----------------------------------------

 - **Standardization**: The W3C Trace Context standard ensures compatibility across ecosystem tools, allowing
@ -34,7 +34,7 @@ Benefits of using ``traceparent`` headers
 - **Interoperability**: Works seamlessly with popular tracing tools like AWS X-Ray, Datadog, Jaeger, and many others,
  making it easy to visualize traces in the tools you're already usi

-How to initiate a trace
+How to Initiate A Trace
 -----------------------

 1. **Enable Tracing Configuration**: Simply add the ``tracing: 100`` flag to in the :ref:`listener <arch_overview_listeners>` config
@ -64,10 +64,10 @@ The ``traceparent`` header has the following format::

   traceparent: {version}-{trace-id}-{parent-id}-{trace-flags}

- {version}: The version of the Trace Context specification (e.g., ``00``).
- {trace-id}: A 16-byte (32-character hexadecimal) unique identifier for the trace.
- {parent-id}: An 8-byte (16-character hexadecimal) identifier for the parent span.
- {trace-flags}: Flags indicating trace options (e.g., sampling).
+- ``{version}``: The version of the Trace Context specification (e.g., ``00``).
+- ``{trace-id}``: A 16-byte (32-character hexadecimal) unique identifier for the trace.
+- ``{parent-id}``: An 8-byte (16-character hexadecimal) identifier for the parent span.
+- ``{trace-flags}``: Flags indicating trace options (e.g., sampling).

 Instrumentation
 ~~~~~~~~~~~~~~~
@ -86,10 +86,10 @@ Example with OpenTelemetry in Python

 Install OpenTelemetry packages:

-.. code-block:: bash
+.. code-block:: console

-   pip install opentelemetry-api opentelemetry-sdk opentelemetry-exporter-otlp
-   pip install opentelemetry-instrumentation-requests
+    $ pip install opentelemetry-api opentelemetry-sdk opentelemetry-exporter-otlp
+    $ pip install opentelemetry-instrumentation-requests

 Set up the tracer and exporter:

@ -228,7 +228,7 @@ To send tracing data to `AWS X-Ray <https://aws.amazon.com/xray/>`_ :

      exporters:
        awsxray:
-          region: your-aws-region
+          region: <Your-Aws-Region>

      service:
        pipelines:
@ -265,7 +265,7 @@ To send tracing data to `Datadog <https://docs.datadoghq.com/getting_started/tra
      exporters:
        datadog:
          api:
-            key: "${DD_API_KEY}"
+            key: "${<Your-Datadog-Api-Key>}"
          site: "${DD_SITE}"

      service:
@ -277,10 +277,10 @@ To send tracing data to `Datadog <https://docs.datadoghq.com/getting_started/tra

 2. **Set Environment Variables**: Provide your Datadog API key and site.

-   .. code-block:: bash
+   .. code-block:: console

-      export DD_API_KEY=your_datadog_api_key
-      export DD_SITE=datadoghq.com  # Or datadoghq.eu
+        $ export <Your-Datadog-Api-Key>=<Your-Datadog-Api-Key>
+        $ export DD_SITE=datadoghq.com  # Or datadoghq.eu

 3. **Deploy the Collector**: Run the collector in your environment.
 4. **Verify Traces**: Access the Datadog APM dashboard to view your traces.
@ -294,7 +294,7 @@ Best Practices
 - **Performance Monitoring**: Be mindful of the performance impact and adjust sampling rates accordingly.
 - **Error Handling**: Implement proper error handling to prevent tracing issues from affecting your application.

-Conclusion
+Summary
 ----------

 By leveraging the ``traceparent`` header for trace context propagation, Arch enables developers to implement
@ -304,10 +304,10 @@ tools like AWS X-Ray and Datadog, enhancing observability and facilitating faste
 Additional Resources
 --------------------

- **OpenTelemetry Documentation**: https://opentelemetry.io/docs/
- **W3C Trace Context Specification**: https://www.w3.org/TR/trace-context/
- **AWS X-Ray Exporter**: https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter/awsxrayexporter
- **Datadog Exporter**: https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter/datadogexporter
+- `OpenTelemetry Documentation <https://opentelemetry.io/docs/>`_
+- `W3C Trace Context Specification <https://www.w3.org/TR/trace-context/>`_
+- `AWS X-Ray Exporter <https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter/awsxrayexporter>`_
+- `Datadog Exporter <https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter/datadogexporter>`_

 .. Note::
-   Replace placeholders like ``your-aws-region``, and ``DD_API_KEY`` with your actual configurations.
+   Replace placeholders such as ``<Your-Aws-Region>`` and ``<Your-Datadog-Api-Key>`` with your actual configurations.
--- a/docs/source/guides/prompt_guard.rst
+++ b/docs/source/guides/prompt_guard.rst
@ -0,0 +1,90 @@
+.. _prompt_guard:
+
+Prompt Guard
+================
+
+**Prompt guard** is a security and validation layer designed to protect prompt-based systems by filtering and analyzing inputs before they reach the core processing stages.
+In applications where prompts generate responses or execute specific actions based on user inputs, prompt guard minimizes risks like malicious inputs, unexpected errors, or misaligned outputs.
+By adding a layer of input scrutiny, prompt guard ensures safer, more reliable, and accurate interactions in prompt-driven environments.
+
+Why Prompt Guard
+----------------
+
+.. vale Vale.Spelling = NO
+
+- **Input Validation**
+    - **Type Enforcement**: Ensures that inputs are of the expected data types, such as integers, strings, lists, or specific formats, reducing errors from unexpected data.
+    - **Value Constraints**: Restricts inputs to valid ranges, lengths, or patterns to avoid unusual or incorrect responses.
+
+- **Prompt Sanitization**
+    - **Injection Prevention**: Detects and filters inputs that might attempt injection attacks, like adding code or SQL queries in a prompt-based application.
+    - **Content Filtering**: Identifies and removes potentially harmful, sensitive, or inappropriate content from inputs to maintain safe interactions.
+
+- **Intent Detection**
+    - **Behavioral Analysis**: Analyzes prompt intent to detect if the input aligns with the function’s intended use. This can help prevent unwanted behavior, such as attempts to bypass limitations or misuse system functions.
+    - **Sentiment and Tone Checking**: Examines the tone of prompts to ensure they align with application guidelines, useful in conversational systems and customer support interactions.
+
+- **Dynamic Error Handling**
+    - **Automatic Correction**: Applies error-handling techniques to suggest corrections for minor input errors, such as typos or misformatted data.
+    - **Feedback Mechanism**: Provides informative error messages to users, helping them understand how to correct input mistakes or adhere to guidelines.
+
+- **Policy Enforcement**
+    - **Role-Based Filtering**: Customizes input validation based on user roles or permissions, allowing more flexibility or stricter enforcement depending on user access.
+    - **Compliance Checks**: Ensures inputs meet compliance or regulatory standards, especially in fields like finance or healthcare, where prompt outputs must align with strict guidelines.
+
+
+Arch-Guard
+----------
+In the evolving landscape of LLM-powered applications, safeguarding against prompt attacks is crucial.
+These attacks involve malicious prompts crafted to manipulate the intended behavior of the model, potentially leading to undesirable outcomes.
+Arch-Guard is designed to address this challenge.
+
+What Is Arch-Guard
+~~~~~~~~~~~~~~~~~~
+`Arch-Guard <https://huggingface.co/collections/katanemolabs/arch-guard-6702bdc08b889e4bce8f446d>`_ is a robust classifier model specifically trained on a diverse corpus of prompt attacks.
+It excels at detecting explicitly malicious prompts and assessing toxic content, providing an essential layer of security for LLM applications.
+
+By embedding Arch-Guard within the Arch architecture, we empower developers to build robust, LLM-powered applications while prioritizing security and safety. With Arch-Guard, you can navigate the complexities of prompt management with confidence, knowing you have a reliable defense against malicious input.
+
+
+How Arch-Guard Works
+----------------------
+
+#. **Pre-Processing Stage**
+
+    As a request or prompt is received, Prompt Guard first performs validation, applying any type, format, or constraint checks. If any violations are detected, the input is flagged, and a tailored error message may be returned.
+
+#. **Sanitization Stage**
+
+    The prompt is analyzed for potentially harmful or inappropriate content, and necessary filters are applied to clean the input.
+
+#. **Behavior Analysis**
+
+    Next, the system assesses the intent and context of the prompt, verifying that it aligns with predefined function requirements. If the prompt raises any red flags, it can be modified or flagged for review.
+
+#. **Error Handling and Feedback**
+
+    If the prompt contains errors or does not meet certain criteria, the user receives immediate feedback or correction suggestions, enhancing usability and reducing the chance of repeated input mistakes.
+
+#. **Output Control**
+
+    After input validation and filtering, the prompt is allowed to proceed to the main processing phase. The output can also undergo a final check to ensure compliance with content guidelines or role-based policies.
+
+
+Benefits of Using Prompt Guard
+------------------------------
+
+- **Enhanced Security**: Protects against injection attacks, harmful content, and misuse, securing both system and user data.
+
+- **Increased Accuracy**: Filters out inappropriate or misaligned inputs, leading to more accurate and intended outputs.
+
+- **Better User Experience**: Clear feedback and error correction improve user interactions by guiding them to correct input formats and constraints.
+
+- **Regulatory Compliance**: Ensures that prompts adhere to necessary guidelines, especially for sensitive fields, minimizing the risk of regulatory breaches.
+
+
+Summary
+-------
+
+Prompt guard is an essential tool for any prompt-based system that values security, accuracy, and compliance.
+By implementing Prompt Guard, developers can provide a robust layer of input validation and security, leading to better-performing, reliable, and safer applications.
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@ -0,0 +1,67 @@
+Welcome to Arch!
+================
+
+.. image:: /_static/img/arch-logo.png
+   :width: 80%
+   :align: center
+
+.. raw:: html
+
+   <div style="text-align: center; font-size: 1.25rem;">
+   <br>
+   <p>Build <strong>fast</strong>, <strong>robust</strong>, and <strong>personalized</strong> GenAI apps</p>
+   </div>
+
+Arch (built by the contributors of `Envoy <https://www.envoyproxy.io/>`_ ) was born out of the belief that:
+
+  *Prompts are nuanced and opaque user requests, which require the same capabilities as traditional HTTP requests including secure handling, intelligent routing, robust observability, and integration with backend (API) systems for personalization - all outside business logic.*
+
+.. tab-set::
+
+  .. tab-item:: Get Started
+
+    .. toctree::
+      :caption: Get Started
+      :titlesonly:
+
+      get_started/overview
+      get_started/intro_to_arch
+      get_started/quickstart
+
+  .. tab-item:: Concepts
+
+    .. toctree::
+      :caption: Concepts
+      :titlesonly:
+
+      concepts/tech_overview/tech_overview
+      concepts/llm_provider
+      concepts/prompt_target
+
+  .. tab-item:: Guides
+
+    .. toctree::
+      :caption: Guides
+      :titlesonly:
+
+      guides/prompt_guard
+      guides/function_calling
+      guides/observability/observability
+
+  .. tab-item:: Build with Arch
+
+    .. toctree::
+      :caption: Build with Arch
+      :titlesonly:
+
+      build_with_arch/agent
+      build_with_arch/rag
+
+  .. tab-item:: Resources
+
+    .. toctree::
+      :caption: Resources
+      :titlesonly:
+
+      resources/configuration_reference
+      resources/error_target
--- a/docs/source/intro/architecture/architecture.rst
+++ b/docs/source/intro/architecture/architecture.rst
@ -1,12 +0,0 @@
-Technical Architecture
-======================
-
-.. toctree::
-  :maxdepth: 2
-
-  intro/terminology
-  intro/threading_model
-  listeners/listeners
-  prompt_processing/prompt_processing
-  listeners/llm_provider
-  model_serving/model_serving
--- a/docs/source/intro/getting_help.rst
+++ b/docs/source/intro/getting_help.rst
@ -1,15 +0,0 @@
-.. _getting_help:
-
-Getting help
-============
-
-We are very interested in building a community around Arch. Please reach out to us if you are
-interested in using it and need help or want to contribute.
-
-Please see `contact info <https://github.com/katanemo/arch#contact>`_.
-
-Reporting security vulnerabilities
----------------------------------
-
-Please see `security contact info
-<https://github.com/katanemo/arch#reporting-security-vulnerabilities>`_.
--- a/docs/source/intro/intro.rst
+++ b/docs/source/intro/intro.rst
@ -1,12 +0,0 @@
-.. _intro:
-
-Introduction
-============
-
-.. toctree::
-  :maxdepth: 2
-
-  what_is_arch
-  architecture/architecture
-  life_of_a_request
-  getting_help
--- a/docs/source/llms/llms.rst
+++ b/docs/source/llms/llms.rst
@ -1,159 +0,0 @@
-.. _llms_in_arch:
-
-LLMs
-====
-
-Arch utilizes purpose-built, industry leading, LLMs to handle the crufty and undifferentiated work around
-accepting, handling and processing prompts. The following sections talk about some of the core models that
-are built-in Arch.
-
-Arch-Guard-v1
-------------
-LLM-powered applications are susceptible to prompt attacks, which are prompts intentionally designed to
-subvert the developer’s intended behavior of the LLM. Arch-Guard-v1 is a classifier model trained on a large
-corpus of attacks, capable of detecting explicitly malicious prompts (and toxicity).
-
-The model is useful as a starting point for identifying and guardrailing against the most risky realistic
-inputs to LLM-powered applications. Our goal in embedding Arch-Guard in the Arch gateway is to enable developers
-to focus on their business logic and factor out security and safety outside application logic. Wth Arch-Guard-v1
-developers can take to significantly reduce prompt attack risk while maintaining control over the user experience.
-
-Below is our test results of the strength of our model as compared to Prompt-Guard from `Meta LLama <https://huggingface.co/meta-llama/Prompt-Guard-86M>`_.
-
-.. list-table::
-   :header-rows: 1
-   :widths: 15 15 10 15 15
-
-   * - Dataset
-     - Jailbreak (Yes/No)
-     - Samples
-     - Prompt-Guard Accuracy
-     - Arch-Guard Accuracy
-   * - casual_conversation
-     - 0
-     - 3725
-     - 1.00
-     - 1.00
-   * - commonqa
-     - 0
-     - 9741
-     - 1.00
-     - 1.00
-   * - financeqa
-     - 0
-     - 1585
-     - 1.00
-     - 1.00
-   * - instruction
-     - 0
-     - 5000
-     - 1.00
-     - 1.00
-   * - jailbreak_behavior_benign
-     - 0
-     - 100
-     - 0.10
-     - 0.20
-   * - jailbreak_behavior_harmful
-     - 1
-     - 100
-     - 0.30
-     - 0.52
-   * - jailbreak_judge
-     - 1
-     - 300
-     - 0.33
-     - 0.49
-   * - jailbreak_prompts
-     - 1
-     - 79
-     - 0.99
-     - 1.00
-   * - jailbreak_tweet
-     - 1
-     - 1282
-     - 0.16
-     - 0.35
-   * - jailbreak_v
-     - 1
-     - 20000
-     - 0.90
-     - 0.93
-   * - jailbreak_vigil
-     - 1
-     - 104
-     - 1.00
-     - 1.00
-   * - mental_health
-     - 0
-     - 3512
-     - 1.00
-     - 1.00
-   * - telecom
-     - 0
-     - 4000
-     - 1.00
-     - 1.00
-   * - truthqa
-     - 0
-     - 817
-     - 1.00
-     - 0.98
-   * - weather
-     - 0
-     - 3121
-     - 1.00
-     - 1.00
-
-.. list-table::
-   :header-rows: 1
-   :widths: 15 20
-
-   * - Statistics
-     - Overall performance
-   * - Overall Accuracy
-     - 0.93568 (Prompt-Guard), 0.95267 (Arch-Guard)
-   * - True positives rate (TPR)
-     - 0.8468 (Prompt-Guard), 0.8887 (Arch-Guard)
-   * - True negative rate (TNR)
-     - 0.9972 (Prompt-Guard), 0.9970 (Arch-Guard)
-   * - False positive rate (FPR)
-     - 0.0028 (Prompt-Guard), 0.0030 (Arch-Guard)
-   * - False negative rate (FNR)
-     - 0.1532 (Prompt-Guard), 0.1113 (Arch-Guard)
-
-.. list-table::
-   :header-rows: 1
-   :widths: 15 20
-
-   * - Metrics
-     - Values
-   * - AUC
-     - 0.857 (Prompt-Guard), 0.880 (Arch-Guard)
-   * - Precision
-     - 0.715 (Prompt-Guard), 0.761 (Arch-Guard)
-   * - Recall
-     - 0.999 (Prompt-Guard), 0.999 (Arch-Guard)
-
-
-
-Arch-FC
-------
-Arch-FC is a lean, powerful and cost-effective agentic model designed for function calling scenarios.
-You can run Arch-FC locally, or use the cloud-hosted version for as little as $0.05/M token (100x cheaper
-than GPT-4o), with a p50 latency of 200ms (5x faster than GPT-4o), while meeting frontier model performance.
-
-.. Note::
-  Function calling helps you personalize the GenAI experience by calling application-specific operations via
-  prompts. This involves any predefined functions or APIs you want to expose to perform tasks, gather
-  information, or manipulate data - via prompts.
-
-  You can get started with function calling simply by configuring a prompt target with a name, description
-  and set of parameters needed by a specific backend function or a hosted API. The name, and description helps
-  Arch-FC match a user prompt to a function or API that can process it.
-
-By using Arch-FC, Arch enables you to easily build agentic workflows tailored to domain-specific use cases -
-from updating insurance claims to creating ad campaigns. Arch-FC analyzes prompts, extracts critical information
-from prompts, engages in lightweight conversations with the user to gather any missing parameters need before
-handling control back to Arch to make the API call to your hosted backend. Arch-FC handles the muck of information
-extraction so that you can focus on the business logic of your application.
--- a/docs/source/resources/configuration_reference.rst
+++ b/docs/source/resources/configuration_reference.rst
@ -7,7 +7,7 @@ e.g. suppporting push observability stats, managing prompt-endpoints as virtual
 options, etc). Our belief that the simple things, should be simple. So we offert good defaults for developers, so
 that they can spend more of their time in building features unique to their AI experience.

-.. literalinclude:: /_config/prompt-config-full-reference.yml
+.. literalinclude:: includes/arch_config_full_reference.yaml
    :language: yaml
    :linenos:
-    :caption: :download:`prompt-config-full-reference-beta-1-0.yml </_config/prompt-config-full-reference.yml>`
+    :caption: :download:`Arch Configuration - Full Reference <includes/arch_config_full_reference.yaml>`
--- a/docs/source/resources/error_target.rst
+++ b/docs/source/resources/error_target.rst
@ -0,0 +1,58 @@
+.. _error_target:
+
+Error Targets
+=============
+
+**Error targets** are designed to capture and manage specific issues or exceptions that occur during Arch's function or system's execution.
+
+These endpoints receive errors forwarded from Arch when issues arise, such as improper function/API calls, guardrail violations, or other processing errors.
+The errors are communicated to the application via headers like ``X-Arch-[ERROR-TYPE]``, enabling you to respond appropriately and handle errors gracefully.
+
+
+Key Concepts
+------------
+
+**Error Type**: Categorizes the nature of the error, such as "ValidationError" or "RuntimeError." These error types help in identifying what
+kind of issue occurred and provide context for troubleshooting.
+
+**Error Message**: A clear, human-readable message describing the error. This should provide enough detail to inform users or developers of
+the root cause or required action.
+
+**Target Prompt**: The specific prompt or operation where the error occurred. Understanding where the error happened helps with debugging
+ and pinpointing the source of the problem.
+
+**Parameter-Specific Errors**: Errors that arise due to invalid or missing parameters when invoking a function. These errors are critical
+for ensuring the correctness of inputs.
+
+
+Error Header Example
+--------------------
+
+.. code-block:: http
+
+
+    HTTP/1.1 400 Bad Request
+    X-Arch-Error-Type: FunctionValidationError
+    X-Arch-Error-Message: Tools call parsing failure
+    X-Arch-Target-Prompt: createUser
+    Content-Type: application/json
+
+    "messages": [
+    {
+      "role": "user",
+      "content": "Please create a user with the following ID: 1234"
+    },
+    {
+      "role": "system",
+      "content": "Expected a string for 'user_id', but got an integer."
+    }]
+
+
+Best Practices and Tips
+-----------------------
+
+- **Graceful Degradation**: If an error occurs, fail gracefully by providing fallback logic or alternative flows when possible.
+
+- **Log Errors**: Always log errors on the server side for later analysis.
+
+- **Client-Side Handling**: Make sure the client can interpret error responses and provide meaningful feedback to the user. Clients should not display raw error codes or stack traces but rather handle them gracefully.
--- a/docs/source/resources/includes/arch_config_full_reference.yaml
+++ b/docs/source/resources/includes/arch_config_full_reference.yaml
--- a/docs/source/root.rst
+++ b/docs/source/root.rst
@ -1,22 +0,0 @@
-Documentation
-=============
-
-.. image:: /_static/img/arch-logo.png
-   :width: 100%
-   :align: center
-
-**Arch is built on (and by the core contributors of) Envoy proxy with the belief that:**
-
-  *Prompts are nuanced and opaque user requests, which require the same capabilities as traditional HTTP requests
-  including secure handling, intelligent routing, robust observability, and integration with backend (API)
-  systems for personalization - all outside business logic.*
-
-.. toctree::
-  :maxdepth: 1
-
-  intro/intro
-  getting_started/getting_started
-  getting_started/use_cases
-  observability/observability
-  llms/llms
-  configuration_reference
--- a/public_types/src/configuration.rs
+++ b/public_types/src/configuration.rs
@ -206,7 +206,7 @@ mod test {
    #[test]
    fn test_deserialize_configuration() {
        let ref_config =
-            fs::read_to_string("../docs/source/_config/prompt-config-full-reference.yml")
+            fs::read_to_string("../docs/source/resources/includes/arch_config_full_reference.yaml")
                .expect("reference config file not found");

        let config: super::Configuration = serde_yaml::from_str(&ref_config).unwrap();