mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
use archfc hosted on aws
This commit is contained in:
parent
28ee14a75c
commit
110ec479cb
6 changed files with 14 additions and 13 deletions
|
|
@ -129,18 +129,18 @@ def stop_server():
|
|||
|
||||
pid_file = get_pid_file()
|
||||
if os.path.exists(pid_file):
|
||||
logger.info("PID file found, shutting down the server.")
|
||||
logger.info("pid file found, shutting down the server.")
|
||||
# read pid from file
|
||||
with open(pid_file, "r") as f:
|
||||
pid = int(f.read())
|
||||
logger.info(f"Killing model server {pid}")
|
||||
logger.info(f"killing model server {pid}")
|
||||
try:
|
||||
os.kill(pid, signal.SIGKILL)
|
||||
except ProcessLookupError:
|
||||
logger.info(f"Process {pid} not found")
|
||||
logger.info(f"process {pid} not found")
|
||||
os.remove(pid_file)
|
||||
else:
|
||||
logger.info("No PID file found, server is not running.")
|
||||
logger.info("no pid file found, server is not running.")
|
||||
|
||||
|
||||
def restart_server(port=51000, foreground=False):
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ logger = get_model_server_logger()
|
|||
|
||||
|
||||
# Define the client
|
||||
ARCH_ENDPOINT = os.getenv("ARCH_ENDPOINT", "https://api.fc.archgw.com/v1")
|
||||
ARCH_ENDPOINT = os.getenv("ARCH_ENDPOINT", "https://archfc.katanemo.dev/v1")
|
||||
ARCH_API_KEY = "EMPTY"
|
||||
ARCH_CLIENT = OpenAI(base_url=ARCH_ENDPOINT, api_key=ARCH_API_KEY)
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import time
|
|||
import logging
|
||||
import src.commons.utils as utils
|
||||
|
||||
from src.commons.globals import handler_map
|
||||
from src.commons.globals import ARCH_ENDPOINT, handler_map
|
||||
from src.core.utils.model_utils import (
|
||||
ChatMessage,
|
||||
ChatCompletionResponse,
|
||||
|
|
@ -51,6 +51,8 @@ logging.getLogger("opentelemetry.exporter.otlp.proto.grpc.exporter").setLevel(
|
|||
app = FastAPI()
|
||||
FastAPIInstrumentor().instrument_app(app)
|
||||
|
||||
logger.info(f"using archfc endpoint: {ARCH_ENDPOINT}")
|
||||
|
||||
|
||||
@app.get("/healthz")
|
||||
async def healthz():
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
@model_server_endpoint = http://localhost:51000
|
||||
@archfc_endpoint = https://api.fc.archgw.com
|
||||
|
||||
@archfc_endpoint = https://archfc.katanemo.dev
|
||||
|
||||
### talk to function calling endpoint
|
||||
POST {{model_server_endpoint}}/function_calling HTTP/1.1
|
||||
|
|
@ -119,7 +118,7 @@ Content-Type: application/json
|
|||
}
|
||||
|
||||
### talk to Arch-Intent directly for completion
|
||||
POST {{archfc_endpoint}}/v1/chat/completions HTTP/1.1
|
||||
POST {{{{archfc_endpoint}}}}/v1/chat/completions HTTP/1.1
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
@model_server_endpoint = http://localhost:51000
|
||||
@archfc_endpoint = https://api.fc.archgw.com
|
||||
@archfc_endpoint = https://archfc.katanemo.dev
|
||||
|
||||
### multi turn conversation with intent, except parameter gathering
|
||||
|
||||
|
|
@ -55,7 +55,7 @@ Content-Type: application/json
|
|||
]
|
||||
}
|
||||
### talk to Arch-Intent directly for completion
|
||||
POST https://api.fc.archgw.com/v1/chat/completions HTTP/1.1
|
||||
POST https://archfc.katanemo.dev/v1/chat/completions HTTP/1.1
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
|
|
@ -126,7 +126,7 @@ Content-Type: application/json
|
|||
]
|
||||
}
|
||||
### talk to Arch-Intent directly for completion, expect No
|
||||
POST https://api.fc.archgw.com/v1/chat/completions HTTP/1.1
|
||||
POST https://archfc.katanemo.dev/v1/chat/completions HTTP/1.1
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
@model_server_endpoint = http://localhost:51000
|
||||
@archfc_endpoint = https://api.fc.archgw.com
|
||||
@archfc_endpoint = https://archfc.katanemo.dev
|
||||
|
||||
### single turn function calling all parameters insurance agent summary
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue