diff --git a/.buildinfo b/.buildinfo index c47cdef2..da2bae38 100755 --- a/.buildinfo +++ b/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file records the configuration used when building these files. When it is not found, a full rebuild will be done. -config: fc2605010c07d06bc6b5b52087f9027c +config: 5f2cdb8c92eb360b7b409003059fad04 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/_images/input-token-metrics.png b/_images/input-token-metrics.png new file mode 100755 index 00000000..a2dc0920 Binary files /dev/null and b/_images/input-token-metrics.png differ diff --git a/_images/llm-request-metrics.png b/_images/llm-request-metrics.png new file mode 100755 index 00000000..fad652be Binary files /dev/null and b/_images/llm-request-metrics.png differ diff --git a/_images/mutli-turn-example.png b/_images/mutli-turn-example.png new file mode 100755 index 00000000..cc7322cb Binary files /dev/null and b/_images/mutli-turn-example.png differ diff --git a/_images/output-token-metrics.png b/_images/output-token-metrics.png new file mode 100755 index 00000000..e5fbeb50 Binary files /dev/null and b/_images/output-token-metrics.png differ diff --git a/_static/documentation_options.js b/_static/documentation_options.js index 6d8f0151..da2fdbd1 100755 --- a/_static/documentation_options.js +++ b/_static/documentation_options.js @@ -1,5 +1,5 @@ const DOCUMENTATION_OPTIONS = { - VERSION: ' v0.1.5', + VERSION: ' v0.1.7', LANGUAGE: 'en', COLLAPSE_INDEX: false, BUILDER: 'html', diff --git a/_static/img/input-token-metrics.png b/_static/img/input-token-metrics.png new file mode 100755 index 00000000..a2dc0920 Binary files /dev/null and b/_static/img/input-token-metrics.png differ diff --git a/_static/img/llm-request-metrics.png b/_static/img/llm-request-metrics.png new file mode 100755 index 00000000..fad652be Binary files /dev/null and b/_static/img/llm-request-metrics.png differ diff --git a/_static/img/output-token-metrics.png b/_static/img/output-token-metrics.png new file mode 100755 index 00000000..e5fbeb50 Binary files /dev/null and b/_static/img/output-token-metrics.png differ diff --git a/build_with_arch/agent.html b/build_with_arch/agent.html index a8d9ed6b..94f8d059 100755 --- a/build_with_arch/agent.html +++ b/build_with_arch/agent.html @@ -7,9 +7,9 @@ -Agentic Workflow | Arch Docs v0.1.5 - - +Agentic Apps | Arch Docs v0.1.7 + + @@ -18,7 +18,7 @@ - + + diff --git a/build_with_arch/multi_turn.html b/build_with_arch/multi_turn.html new file mode 100755 index 00000000..751fab94 --- /dev/null +++ b/build_with_arch/multi_turn.html @@ -0,0 +1,360 @@ + + + + + + + + + +Multi-Turn | Arch Docs v0.1.7 + + + + + + + + + + + + + + + +
+ Skip to content +
+ +
+
+
+ +
+
+
+
+
+
+ +
+
+

Multi-Turn

+

Developers often struggle to efficiently handle +follow-up or clarification questions. Specifically, when users ask for changes or additions to previous responses, it requires developers to +re-write prompts using LLMs with precise prompt engineering techniques. This process is slow, manual, error prone and adds latency and token cost for +common scenarios that can be managed more efficiently.

+

Arch is highly capable of accurately detecting and processing prompts in multi-turn scenarios so that you can buil fast and accurate agents in minutes. +Below are some cnversational examples that you can build via Arch. Each example is enriched with annotations (via ** [Arch] ** ) that illustrates how Arch +processess conversational messages on your behalf.

+
+

Note

+

The following section assumes that you have some knowledge about the core concepts of Arch, such as prompt_targets. +If you haven’t familizaried yourself with Arch’s concepts, we recommend you first read the tech overview section firtst. +Additionally, the conversation examples below assume the usage of the following arch_config.yaml file.

+
+
+

Example 1: Adjusting Retrieval

+
User: What are the benefits of renewable energy?
+**[Arch]**: Check if there is an available <prompt_target> that can handle this user query.
+**[Arch]**: Found "get_info_for_energy_source" prompt_target in arch_config.yaml. Forward prompt to the endpoint configured in "get_info_for_energy_source"
+...
+Assistant: Renewable energy reduces greenhouse gas emissions, lowers air pollution, and provides sustainable power sources like solar and wind.
+
+User: Include cost considerations in the response.
+**[Arch]**: Follow-up detected. Forward prompt history to the "get_info_for_energy_source" prompt_target and post the following parameters consideration="cost"
+...
+Assistant: Renewable energy reduces greenhouse gas emissions, lowers air pollution, and provides sustainable power sources like solar and wind. While the initial setup costs can be high, long-term savings from reduced fuel expenses and government incentives make it cost-effective.
+
+
+
+
+

Example 2: Switching Intent

+
User: What are the symptoms of diabetes?
+**[Arch]**: Check if there is an available <prompt_target> that can handle this user query.
+**[Arch]**: Found "diseases_symptoms" prompt_target in arch_config.yaml. Forward disease=diabeteres to "diseases_symptoms" prompt target
+...
+Assistant: Common symptoms include frequent urination, excessive thirst, fatigue, and blurry vision.
+
+User: How is it diagnosed?
+**[Arch]**: New intent detected.
+**[Arch]**: Found "disease_diagnoses" prompt_target in arch_config.yaml. Forward disease=diabeteres to "disease_diagnoses" prompt target
+...
+Assistant: Diabetes is diagnosed through blood tests like fasting blood sugar, A1C, or an oral glucose tolerance test.
+
+
+
+
+

Build Multi-Turn RAG Apps

+

The following section describes how you can easilly add support for multi-turn scenarios via Arch. You process and manage multi-turn prompts +just like you manage single-turn ones. Arch handles the conpleixity of detecting the correct intent based on the last user prompt and +the covnersational history, extracts relevant parameters needed by downstream APIs, and dipatches calls to any upstream LLMs to summarize the +response from your APIs.

+
+

Step 1: Define Arch Config

+
+
Arch Config
+
 1version: v0.1
+ 2listener:
+ 3  address: 127.0.0.1
+ 4  port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
+ 5  message_format: huggingface
+ 6
+ 7# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
+ 8llm_providers:
+ 9  - name: OpenAI
+10    provider: openai
+11    access_key: $OPENAI_API_KEY
+12    model: gpt-3.5-turbo
+13    default: true
+14
+15# default system prompt used by all prompt targets
+16system_prompt: |
+17   You are a helpful assistant and can offer information about energy sources. You will get a JSON object with energy_source and consideration fields. Focus on answering using those fields
+18
+19prompt_targets:
+20  - name: get_info_for_energy_source
+21    description: get information about an energy source
+22    parameters:
+23      - name: energy_source
+24        type: str
+25        description: a source of energy
+26        required: true
+27        enum: [renewable, fossil]
+28      - name: consideration
+29        type: str
+30        description: a specific type of consideration for an energy source
+31        enum: [cost, economic, technology]
+32    endpoint:
+33      name: rag_energy_source_agent
+34      path: /agent/energy_source_info
+35      http_method: POST
+
+
+
+
+
+

Step 2: Process Request in Flask

+

Once the prompt targets are configured as above, handle parameters across multi-turn as if its a single-turn request

+
+
Parameter handling with Flask
+
 1import os
+ 2import gradio as gr
+ 3
+ 4from fastapi import FastAPI, HTTPException
+ 5from pydantic import BaseModel
+ 6from typing import Optional
+ 7from openai import OpenAI
+ 8from common import create_gradio_app
+ 9
+10app = FastAPI()
+11
+12
+13# Define the request model
+14class EnergySourceRequest(BaseModel):
+15    energy_source: str
+16    consideration: Optional[str] = None
+17
+18
+19class EnergySourceResponse(BaseModel):
+20    energy_source: str
+21    consideration: Optional[str] = None
+22
+23
+24# Post method for device summary
+25@app.post("/agent/energy_source_info")
+26def get_workforce(request: EnergySourceRequest):
+27    """
+28    Endpoint to get details about energy source
+29    """
+30    considertion = "You don't have any specific consideration. Feel free to talk in a more open ended fashion"
+31
+32    if request.consideration is not None:
+33        considertion = f"Add specific focus on the following consideration when you summarize the content for the energy source: {request.consideration}"
+34
+35    response = {
+36        "energy_source": request.energy_source,
+37        "consideration": considertion,
+38    }
+39    return response
+
+
+
+
+
+

Demo App

+

For your convenience, we’ve built a demo app +that you can test and modify locally for multi-turn RAG scenarios.

+
+../_images/mutli-turn-example.png + +
+

Example multi-turn user conversation showing adjusting retrieval

+
+
+
+
+
+
+
+
+
+
+ + + + + + + \ No newline at end of file diff --git a/build_with_arch/rag.html b/build_with_arch/rag.html index b29d860f..60feff10 100755 --- a/build_with_arch/rag.html +++ b/build_with_arch/rag.html @@ -7,9 +7,9 @@ -RAG Application | Arch Docs v0.1.5 - - +RAG Apps | Arch Docs v0.1.7 + + @@ -18,8 +18,8 @@ - - + + + diff --git a/concepts/llm_provider.html b/concepts/llm_provider.html index d3e5baea..673facaf 100755 --- a/concepts/llm_provider.html +++ b/concepts/llm_provider.html @@ -7,9 +7,9 @@ -LLM Provider | Arch Docs v0.1.5 - - +LLM Provider | Arch Docs v0.1.7 + + @@ -39,7 +39,7 @@