mirror of
https://github.com/katanemo/plano.git
synced 2026-06-23 15:38:07 +02:00
add retry
This commit is contained in:
parent
71658ddbd9
commit
a3f93de85d
7 changed files with 29 additions and 9 deletions
|
|
@ -44,7 +44,7 @@ listeners:
|
|||
- access_key: $OPENAI_API_KEY
|
||||
model: openai/gpt-4o-mini
|
||||
address: 0.0.0.0
|
||||
port: 9000
|
||||
port: 12000
|
||||
|
||||
tracing:
|
||||
random_sampling: 100
|
||||
|
|
|
|||
|
|
@ -6,3 +6,12 @@ services:
|
|||
- "16686:16686"
|
||||
- "4317:4317"
|
||||
- "4318:4318"
|
||||
open-web-ui:
|
||||
image: ghcr.io/open-webui/open-webui:main
|
||||
restart: always
|
||||
ports:
|
||||
- "8080:8080"
|
||||
environment:
|
||||
- DEFAULT_MODEL=gpt-4o-mini
|
||||
- ENABLE_OPENAI_API=true
|
||||
- OPENAI_API_BASE_URL=http://host.docker.internal:8001/v1
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
|
||||
# Configuration for archgw LLM gateway
|
||||
LLM_GATEWAY_ENDPOINT = os.getenv("LLM_GATEWAY_ENDPOINT", "http://localhost:9000/v1")
|
||||
LLM_GATEWAY_ENDPOINT = os.getenv("LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1")
|
||||
RAG_MODEL = "gpt-4o-mini"
|
||||
|
||||
# Initialize OpenAI client for archgw
|
||||
|
|
@ -91,7 +91,7 @@ async def find_relevant_passages(
|
|||
logger.info(f"Calling archgw to find relevant passages for query: '{query}'")
|
||||
|
||||
# Prepare extra headers if traceparent is provided
|
||||
extra_headers = {}
|
||||
extra_headers = {"x-envoy-max-retries": "3"}
|
||||
if traceparent:
|
||||
extra_headers["traceparent"] = traceparent
|
||||
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
|
||||
# Configuration for archgw LLM gateway
|
||||
LLM_GATEWAY_ENDPOINT = os.getenv("LLM_GATEWAY_ENDPOINT", "http://localhost:9000/v1")
|
||||
LLM_GATEWAY_ENDPOINT = os.getenv("LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1")
|
||||
QUERY_REWRITE_MODEL = "gpt-4o-mini"
|
||||
|
||||
# Initialize OpenAI client for archgw
|
||||
|
|
@ -50,7 +50,7 @@ async def rewrite_query_with_archgw(
|
|||
|
||||
try:
|
||||
# Call archgw using OpenAI client
|
||||
extra_headers = {}
|
||||
extra_headers = {"x-envoy-max-retries": "3"}
|
||||
if traceparent_header:
|
||||
extra_headers["traceparent"] = traceparent_header
|
||||
logger.info(f"Calling archgw at {LLM_GATEWAY_ENDPOINT} to rewrite query")
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ logging.basicConfig(level=logging.INFO)
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Configuration for archgw LLM gateway
|
||||
LLM_GATEWAY_ENDPOINT = os.getenv("LLM_GATEWAY_ENDPOINT", "http://localhost:9000/v1")
|
||||
LLM_GATEWAY_ENDPOINT = os.getenv("LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1")
|
||||
RESPONSE_MODEL = "gpt-4o"
|
||||
|
||||
# System prompt for response generation
|
||||
|
|
@ -94,7 +94,7 @@ async def stream_chat_completions(
|
|||
)
|
||||
|
||||
# Prepare extra headers if traceparent is provided
|
||||
extra_headers = {}
|
||||
extra_headers = {"x-envoy-max-retries": "3"}
|
||||
if traceparent_header:
|
||||
extra_headers["traceparent"] = traceparent_header
|
||||
|
||||
|
|
@ -191,7 +191,7 @@ async def non_streaming_chat_completions(
|
|||
logger.info(f"Calling archgw at {LLM_GATEWAY_ENDPOINT} to generate response")
|
||||
|
||||
# Prepare extra headers if traceparent is provided
|
||||
extra_headers = {}
|
||||
extra_headers = {"x-envoy-max-retries": "3"}
|
||||
if traceparent_header:
|
||||
extra_headers["traceparent"] = traceparent_header
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue