mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
ensure that we can call the new api.fc.archgw.com url, logging fixes … (#142)
* ensure that we can call the new api.fc.archgw.com url, logging fixes and minor cli bug fixes * fixed a bug where model_server printed on terminal after start script stopped running * updating the logo and fixing the website styles * updated the branch with feedback from Co and Adil --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local>
This commit is contained in:
parent
82fc91495e
commit
3ed50e61d2
11 changed files with 70 additions and 47 deletions
|
|
@ -1 +0,0 @@
|
|||
huggingface-cli download TheBloke/Mistral-7B-Instruct-v0.2-GGUF mistral-7b-instruct-v0.2.Q4_K_M.gguf --local-dir . --local-dir-use-symlinks False
|
||||
|
|
@ -90,6 +90,7 @@ def up(file, path):
|
|||
|
||||
# Set the ARCH_CONFIG_FILE environment variable
|
||||
env_stage = {}
|
||||
env = os.environ.copy()
|
||||
#check if access_keys are preesnt in the config file
|
||||
access_keys = get_llm_provider_access_keys(arch_config_file=arch_config_file)
|
||||
if access_keys:
|
||||
|
|
@ -118,7 +119,6 @@ def up(file, path):
|
|||
for key, value in env_stage.items():
|
||||
file.write(f"{key}={value}\n")
|
||||
|
||||
env = os.environ.copy()
|
||||
env.update(env_stage)
|
||||
env['ARCH_CONFIG_FILE'] = arch_config_file
|
||||
|
||||
|
|
|
|||
|
|
@ -105,6 +105,7 @@ def start_arch_modelserver():
|
|||
subprocess.run(
|
||||
['archgw_modelserver', 'restart'],
|
||||
check=True,
|
||||
start_new_session=True
|
||||
)
|
||||
print("Successfull run the archgw model_server")
|
||||
except subprocess.CalledProcessError as e:
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ def run_docker_compose_ps(compose_file, env):
|
|||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
start_new_session=True,
|
||||
env=env
|
||||
)
|
||||
# Capture the output of `docker-compose ps`
|
||||
|
|
|
|||
Binary file not shown.
|
Before Width: | Height: | Size: 311 KiB After Width: | Height: | Size: 335 KiB |
|
|
@ -38,6 +38,9 @@ def start_server():
|
|||
print(f"Starting Archgw Model Server")
|
||||
process = subprocess.Popen(
|
||||
["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "51000"],
|
||||
start_new_session=True,
|
||||
stdout=subprocess.DEVNULL, # Suppress standard output. There is a logger that model_server prints to
|
||||
stderr=subprocess.DEVNULL, # Suppress standard error. There is a logger that model_server prints to
|
||||
)
|
||||
|
||||
if wait_for_health_check("http://0.0.0.0:51000/healthz"):
|
||||
|
|
|
|||
|
|
@ -4,21 +4,17 @@ from fastapi import FastAPI, Response
|
|||
from .common import ChatMessage, Message
|
||||
from .arch_handler import ArchHandler
|
||||
from .bolt_handler import BoltHandler
|
||||
from app.utils import load_yaml_config
|
||||
import logging
|
||||
import yaml
|
||||
from app.utils import load_yaml_config, get_model_server_logger
|
||||
from openai import OpenAI
|
||||
import os
|
||||
import hashlib
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
logger = get_model_server_logger()
|
||||
|
||||
params = load_yaml_config("openai_params.yaml")
|
||||
ollama_endpoint = os.getenv("OLLAMA_ENDPOINT", "localhost")
|
||||
ollama_model = os.getenv("OLLAMA_MODEL", "Arch-Function-Calling-1.5B-Q4_K_M")
|
||||
fc_url = os.getenv("FC_URL", "https://arch-fc-free-trial-4mzywewe.uc.gateway.dev/v1")
|
||||
fc_url = os.getenv("FC_URL", "https://api.fc.archgw.com/v1")
|
||||
|
||||
mode = os.getenv("MODE", "cloud")
|
||||
if mode not in ["cloud", "local-gpu", "local-cpu"]:
|
||||
|
|
@ -52,7 +48,7 @@ logger.info(f"using endpoint: {endpoint}")
|
|||
|
||||
|
||||
def process_state(arch_state, history: list[Message]):
|
||||
print("state: {}".format(arch_state))
|
||||
logger.info("state: {}".format(arch_state))
|
||||
state_json = json.loads(arch_state)
|
||||
|
||||
state_map = {}
|
||||
|
|
@ -61,7 +57,7 @@ def process_state(arch_state, history: list[Message]):
|
|||
for tool_state in tools_state:
|
||||
state_map[tool_state["key"]] = tool_state
|
||||
|
||||
print(f"state_map: {json.dumps(state_map)}")
|
||||
logger.info(f"state_map: {json.dumps(state_map)}")
|
||||
|
||||
sha_history = []
|
||||
updated_history = []
|
||||
|
|
@ -73,7 +69,7 @@ def process_state(arch_state, history: list[Message]):
|
|||
joined_key_str = ("#.#").join(sha_history)
|
||||
sha256_hash.update(joined_key_str.encode())
|
||||
sha_key = sha256_hash.hexdigest()
|
||||
print(f"sha_key: {sha_key}")
|
||||
logger.info(f"sha_key: {sha_key}")
|
||||
if sha_key in state_map:
|
||||
tool_call_state = state_map[sha_key]
|
||||
if "tool_call" in tool_call_state:
|
||||
|
|
|
|||
|
|
@ -1,13 +0,0 @@
|
|||
from load_models import (
|
||||
load_transformers,
|
||||
load_ner_models,
|
||||
load_toxic_model,
|
||||
load_jailbreak_model,
|
||||
)
|
||||
|
||||
print("installing transformers")
|
||||
load_transformers()
|
||||
print("installing ner models")
|
||||
load_ner_models()
|
||||
print("installing jailbreak models")
|
||||
load_jailbreak_model()
|
||||
|
|
@ -8,7 +8,7 @@ from app.load_models import (
|
|||
get_device,
|
||||
)
|
||||
import os
|
||||
from app.utils import GuardHandler, split_text_into_chunks, load_yaml_config
|
||||
from app.utils import GuardHandler, split_text_into_chunks, load_yaml_config, get_model_server_logger
|
||||
import torch
|
||||
import yaml
|
||||
import string
|
||||
|
|
@ -17,11 +17,10 @@ import logging
|
|||
from app.arch_fc.arch_fc import chat_completion as arch_fc_chat_completion, ChatMessage
|
||||
import os.path
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info("Device used: " + get_device())
|
||||
|
||||
logger = get_model_server_logger()
|
||||
logger.info(f"Devices Avialble: {get_device()}")
|
||||
|
||||
transformers = load_transformers()
|
||||
zero_shot_models = load_zero_shot_models()
|
||||
guard_model_config = load_yaml_config("guard_model_config.yaml")
|
||||
|
|
@ -40,7 +39,6 @@ guard_handler = GuardHandler(toxic_model=None, jailbreak_model=jailbreak_model)
|
|||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
class EmbeddingRequest(BaseModel):
|
||||
input: str
|
||||
model: str
|
||||
|
|
@ -63,12 +61,13 @@ async def models():
|
|||
|
||||
@app.post("/embeddings")
|
||||
async def embedding(req: EmbeddingRequest, res: Response):
|
||||
print(f"Embedding Call Start Time: {time.time()}")
|
||||
|
||||
if req.model not in transformers:
|
||||
raise HTTPException(status_code=400, detail="unknown model: " + req.model)
|
||||
|
||||
start = time.time()
|
||||
embeddings = transformers[req.model].encode([req.input])
|
||||
print(f"Embedding Call Complete Time: {time.time()-start}")
|
||||
logger.info(f"Embedding Call Complete Time: {time.time()-start}")
|
||||
data = []
|
||||
|
||||
for embedding in embeddings.tolist():
|
||||
|
|
@ -78,10 +77,8 @@ async def embedding(req: EmbeddingRequest, res: Response):
|
|||
"prompt_tokens": 0,
|
||||
"total_tokens": 0,
|
||||
}
|
||||
print(f"Embedding Call Complete Time: {time.time()}")
|
||||
return {"data": data, "model": req.model, "object": "list", "usage": usage}
|
||||
|
||||
|
||||
class GuardRequest(BaseModel):
|
||||
input: str
|
||||
task: str
|
||||
|
|
|
|||
|
|
@ -4,7 +4,10 @@ import time
|
|||
import torch
|
||||
import pkg_resources
|
||||
import yaml
|
||||
import os
|
||||
import logging
|
||||
|
||||
logger_instance = None
|
||||
|
||||
def load_yaml_config(file_name):
|
||||
# Load the YAML file from the package
|
||||
|
|
@ -134,3 +137,40 @@ class GuardHandler:
|
|||
f"{self.task}_sentence": sentence,
|
||||
}
|
||||
return result_dict
|
||||
|
||||
def get_model_server_logger():
|
||||
global logger_instance
|
||||
|
||||
if logger_instance is not None:
|
||||
# If the logger is already initialized, return the existing instance
|
||||
return logger_instance
|
||||
|
||||
# Define log file path outside current directory (e.g., ~/archgw_logs)
|
||||
log_dir = os.path.expanduser("~/archgw_logs")
|
||||
log_file = "modelserver.log"
|
||||
log_file_path = os.path.join(log_dir, log_file)
|
||||
|
||||
# Ensure the log directory exists, create it if necessary, handle permissions errors
|
||||
try:
|
||||
if not os.path.exists(log_dir):
|
||||
os.makedirs(log_dir, exist_ok=True) # Create directory if it doesn't exist
|
||||
|
||||
# Check if the script has write permission in the log directory
|
||||
if not os.access(log_dir, os.W_OK):
|
||||
raise PermissionError(f"No write permission for the directory: {log_dir}")
|
||||
# Configure logging to file and console using basicConfig
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler(log_file_path, mode='w'), # Overwrite logs in file
|
||||
]
|
||||
)
|
||||
except (PermissionError, OSError) as e:
|
||||
# Dont' fallback to console logging if there are issues writing to the log file
|
||||
raise RuntimeError(f"No write permission for the directory: {log_dir}")
|
||||
|
||||
# Initialize the logger instance after configuring handlers
|
||||
logger_instance = logging.getLogger("model_server_logger")
|
||||
return logger_instance
|
||||
|
|
|
|||
|
|
@ -57,11 +57,10 @@
|
|||
height: auto;
|
||||
display: block;
|
||||
}
|
||||
h2.bold-text {
|
||||
font-weight: bold;
|
||||
div.bold-text {
|
||||
font-size: 1.5rem;
|
||||
margin-bottom: 10px;
|
||||
line-height: 2rem;
|
||||
margin-bottom: 5px;
|
||||
line-height: 3rem;
|
||||
}
|
||||
.subheading {
|
||||
font-size: 1rem;
|
||||
|
|
@ -173,8 +172,8 @@
|
|||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<a href="https://github.com/">GitHub</a>
|
||||
<a href="https://katanemo.github.io/arch/">Docs</a>
|
||||
<a href="https://github.com/katanemo/arch">GitHub</a>
|
||||
<a href="https://docs.archgw.com">Docs</a>
|
||||
<a href="https://discord.gg/rbjqVbpa">Discord</a>
|
||||
<a href="https://github.com/katanemo/arch?tab=readme-ov-file#contact">Contact</a>
|
||||
</header>
|
||||
|
|
@ -182,11 +181,11 @@
|
|||
<div class="image-placeholder">
|
||||
<img src="https://storage.googleapis.com/arch-website-assets/arch-logo.png" alt="Arch Gateway Logo" title="Arch Gateway Logo">
|
||||
</div>
|
||||
<h2 class="bold-text">Build fast, robust, and personalized GenAI apps</h2>
|
||||
<div class="subheading">An OSS intelligent gateway engineered with (fast) LLMs to handle the <b>muck</b> of prompts.</div>
|
||||
<div class="bold-text">Build <strong>fast</strong>, <strong>robust</strong>, and <strong>personalized</strong> GenAI apps</div>
|
||||
<div class="subheading">An <a href="https://github.com/katanemo/arch">open source</a> gateway engineered with (fast) LLMs to handle the <b>muck</b> of prompts.</div>
|
||||
<div class="buttons">
|
||||
<a href="https://github.com/katanemo/arch?tab=readme-ov-file#getstarted">Download</a>
|
||||
<a href="https://katanemo.github.io/arch/">Documentation</a>
|
||||
<a href="https://docs.archgw.com">Documentation</a>
|
||||
</div>
|
||||
<hr>
|
||||
<div class="why_arch">
|
||||
|
|
@ -250,7 +249,7 @@
|
|||
<h2 class="get-started">Let's get started </h2>
|
||||
<div class="buttons">
|
||||
<a href="https://github.com/katanemo/arch?tab=readme-ov-file#getstarted">Download</a>
|
||||
<a href="https://katanemo.github.io/arch/">Documentation</a>
|
||||
<a href="https://docs.archgw.com">Documentation</a>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue