mirror of
https://github.com/katanemo/plano.git
synced 2026-06-20 15:28:07 +02:00
model server build (#127)
* first commit to have model_server not be dependent on Docker * making changes to fix the docker-compose file for archgw to set DNS_V4 and minor fixes with the build * additional fixes for model server to be separated out in the build * additional fixes for model server to be separated out in the build * fix to get model_server to be built as a separate python process. TODO: fix the embeddings logs after cli completes * fixing init to pull tempfile using the tempfile python package --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local>
This commit is contained in:
parent
7d21359f5b
commit
b60ceb9168
21 changed files with 3390 additions and 154 deletions
|
|
@ -1,9 +1,10 @@
|
|||
import json
|
||||
import random
|
||||
from fastapi import FastAPI, Response
|
||||
from app.arch_fc.arch_handler import ArchHandler
|
||||
from app.arch_fc.bolt_handler import BoltHandler
|
||||
from app.arch_fc.common import ChatMessage, Message
|
||||
from .common import ChatMessage, Message
|
||||
from .arch_handler import ArchHandler
|
||||
from .bolt_handler import BoltHandler
|
||||
from app.utils import load_yaml_config
|
||||
import logging
|
||||
import yaml
|
||||
from openai import OpenAI
|
||||
|
|
@ -14,17 +15,14 @@ logging.basicConfig(
|
|||
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
with open("openai_params.yaml") as f:
|
||||
params = yaml.safe_load(f)
|
||||
|
||||
params = load_yaml_config("openai_params.yaml")
|
||||
ollama_endpoint = os.getenv("OLLAMA_ENDPOINT", "localhost")
|
||||
ollama_model = os.getenv("OLLAMA_MODEL", "Arch-Function-Calling-1.5B-Q4_K_M")
|
||||
fc_url = os.getenv("FC_URL", ollama_endpoint)
|
||||
fc_url = os.getenv("FC_URL", "https://arch-fc-free-trial-4mzywewe.uc.gateway.dev/v1")
|
||||
|
||||
mode = os.getenv("MODE", "cloud")
|
||||
if mode not in ["cloud", "local-gpu", "local-cpu"]:
|
||||
raise ValueError(f"Invalid mode: {mode}")
|
||||
arch_api_key = os.getenv("ARCH_API_KEY", "vllm")
|
||||
|
||||
handler = None
|
||||
if ollama_model.startswith("Arch"):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue