model server build (#127)

* first commit to have model_server not be dependent on Docker * making changes to fix the docker-compose file for archgw to set DNS_V4 and minor fixes with the build * additional fixes for model server to be separated out in the build * additional fixes for model server to be separated out in the build * fix to get model_server to be built as a separate python process. TODO: fix the embeddings logs after cli completes * fixing init to pull tempfile using the tempfile python package --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local>
2026-06-20 15:28:07 +02:00 · 2024-10-06 18:21:43 -07:00 · 2024-10-06 18:21:43 -07:00 · b60ceb9168
commit b60ceb9168
parent 7d21359f5b
21 changed files with 3390 additions and 154 deletions
--- a/model_server/app/arch_fc/arch_fc.py
+++ b/model_server/app/arch_fc/arch_fc.py
@ -1,9 +1,10 @@
 import json
 import random
 from fastapi import FastAPI, Response
-from app.arch_fc.arch_handler import ArchHandler
-from app.arch_fc.bolt_handler import BoltHandler
-from app.arch_fc.common import ChatMessage, Message
+from .common import ChatMessage, Message
+from .arch_handler import ArchHandler
+from .bolt_handler import BoltHandler
+from app.utils import load_yaml_config
 import logging
 import yaml
 from openai import OpenAI
@ -14,17 +15,14 @@ logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
 )
 logger = logging.getLogger(__name__)
-
-with open("openai_params.yaml") as f:
-    params = yaml.safe_load(f)
-
+params = load_yaml_config("openai_params.yaml")
 ollama_endpoint = os.getenv("OLLAMA_ENDPOINT", "localhost")
 ollama_model = os.getenv("OLLAMA_MODEL", "Arch-Function-Calling-1.5B-Q4_K_M")
-fc_url = os.getenv("FC_URL", ollama_endpoint)
+fc_url = os.getenv("FC_URL", "https://arch-fc-free-trial-4mzywewe.uc.gateway.dev/v1")
+
 mode = os.getenv("MODE", "cloud")
 if mode not in ["cloud", "local-gpu", "local-cpu"]:
    raise ValueError(f"Invalid mode: {mode}")
-arch_api_key = os.getenv("ARCH_API_KEY", "vllm")

 handler = None
 if ollama_model.startswith("Arch"):