add support for default target (#111)

* add support for default target

* add more fixes
This commit is contained in:
Adil Hafeez 2024-10-02 20:43:16 -07:00 committed by GitHub
parent c8d0dbec26
commit 1b57a49c9d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 215 additions and 88 deletions

View file

@ -9,6 +9,10 @@ import yaml
from openai import OpenAI
import os
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
with open("openai_params.yaml") as f:
params = yaml.safe_load(f)
@ -20,7 +24,6 @@ mode = os.getenv("MODE", "cloud")
if mode not in ["cloud", "local-gpu", "local-cpu"]:
raise ValueError(f"Invalid mode: {mode}")
arch_api_key = os.getenv("ARCH_API_KEY", "vllm")
logger = logging.getLogger("uvicorn.error")
handler = None
if ollama_model.startswith("Arch"):
@ -28,17 +31,12 @@ if ollama_model.startswith("Arch"):
else:
handler = BoltHandler()
# app = FastAPI()
if mode == "cloud":
client = OpenAI(
base_url=fc_url,
api_key="EMPTY",
)
models = client.models.list()
model = models.data[0].id
chosen_model = model
chosen_model = "fc-cloud"
endpoint = fc_url
else:
client = OpenAI(
@ -47,12 +45,12 @@ else:
)
chosen_model = ollama_model
endpoint = ollama_endpoint
logger.info(f"serving mode: {mode}")
logger.info(f"using model: {chosen_model}")
logger.info(f"using endpoint: {endpoint}")
async def chat_completion(req: ChatMessage, res: Response):
logger.info("starting request")
tools_encoded = handler._format_system(req.tools)