mirror of
https://github.com/katanemo/plano.git
synced 2026-05-11 00:32:42 +02:00
Add support for streaming and fixes few issues (see description) (#202)
This commit is contained in:
parent
29ff8da60f
commit
662a840ac5
45 changed files with 2266 additions and 477 deletions
|
|
@ -66,18 +66,18 @@ async def insurance_claim_details(req: InsuranceClaimDetailsRequest, res: Respon
|
|||
|
||||
|
||||
class DefaultTargetRequest(BaseModel):
|
||||
arch_messages: list
|
||||
messages: list
|
||||
|
||||
|
||||
@app.post("/default_target")
|
||||
async def default_target(req: DefaultTargetRequest, res: Response):
|
||||
logger.info(f"Received arch_messages: {req.arch_messages}")
|
||||
logger.info(f"Received arch_messages: {req.messages}")
|
||||
resp = {
|
||||
"choices": [
|
||||
{
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "hello world from api server",
|
||||
"content": "I can help you with weather forecast or insurance claim details",
|
||||
},
|
||||
"finish_reason": "completed",
|
||||
"index": 0,
|
||||
|
|
|
|||
|
|
@ -16,12 +16,27 @@ overrides:
|
|||
prompt_target_intent_matching_threshold: 0.6
|
||||
|
||||
llm_providers:
|
||||
- name: gpt
|
||||
access_key: OPENAI_API_KEY
|
||||
- name: gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider: openai
|
||||
model: gpt-3.5-turbo
|
||||
model: gpt-4o-mini
|
||||
default: true
|
||||
|
||||
- name: gpt-3.5-turbo-0125
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider: openai
|
||||
model: gpt-3.5-turbo-0125
|
||||
|
||||
- name: gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider: openai
|
||||
model: gpt-4o
|
||||
|
||||
- name: ministral-3b
|
||||
access_key: $MISTRAL_API_KEY
|
||||
provider: mistral
|
||||
model: ministral-3b-latest
|
||||
|
||||
system_prompt: |
|
||||
You are a helpful assistant.
|
||||
|
||||
|
|
@ -67,10 +82,10 @@ prompt_targets:
|
|||
name: api_server
|
||||
path: /default_target
|
||||
system_prompt: |
|
||||
You are a helpful assistant. Use the information that is provided to you.
|
||||
You are a helpful assistant! Summarize the user's request and provide a helpful response.
|
||||
# if it is set to false arch will send response that it received from this prompt target to the user
|
||||
# if true arch will forward the response to the default LLM
|
||||
auto_llm_dispatch_on_response: true
|
||||
auto_llm_dispatch_on_response: false
|
||||
|
||||
tracing:
|
||||
random_sampling: 100
|
||||
|
|
|
|||
|
|
@ -13,11 +13,11 @@ services:
|
|||
chatbot_ui:
|
||||
build:
|
||||
context: ../../chatbot_ui
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- "18080:8080"
|
||||
environment:
|
||||
- CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1 #this is only because we are running the sample app in the same docker container environemtn as archgw
|
||||
# this is only because we are running the sample app in the same docker container environemtn as archgw
|
||||
- CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
volumes:
|
||||
|
|
@ -38,6 +38,8 @@ services:
|
|||
- "${PORT_UI:-55679}:55679"
|
||||
- "${PORT_GRPC:-4317}:4317"
|
||||
- "${PORT_HTTP:-4318}:4318"
|
||||
profiles:
|
||||
- monitoring
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue