mirror of
https://github.com/katanemo/plano.git
synced 2026-06-26 15:39:40 +02:00
ensure that we can call the new api.fc.archgw.com url, logging fixes … (#142)
* ensure that we can call the new api.fc.archgw.com url, logging fixes and minor cli bug fixes * fixed a bug where model_server printed on terminal after start script stopped running * updating the logo and fixing the website styles * updated the branch with feedback from Co and Adil --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local>
This commit is contained in:
parent
82fc91495e
commit
3ed50e61d2
11 changed files with 70 additions and 47 deletions
|
|
@ -8,7 +8,7 @@ from app.load_models import (
|
|||
get_device,
|
||||
)
|
||||
import os
|
||||
from app.utils import GuardHandler, split_text_into_chunks, load_yaml_config
|
||||
from app.utils import GuardHandler, split_text_into_chunks, load_yaml_config, get_model_server_logger
|
||||
import torch
|
||||
import yaml
|
||||
import string
|
||||
|
|
@ -17,11 +17,10 @@ import logging
|
|||
from app.arch_fc.arch_fc import chat_completion as arch_fc_chat_completion, ChatMessage
|
||||
import os.path
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info("Device used: " + get_device())
|
||||
|
||||
logger = get_model_server_logger()
|
||||
logger.info(f"Devices Avialble: {get_device()}")
|
||||
|
||||
transformers = load_transformers()
|
||||
zero_shot_models = load_zero_shot_models()
|
||||
guard_model_config = load_yaml_config("guard_model_config.yaml")
|
||||
|
|
@ -40,7 +39,6 @@ guard_handler = GuardHandler(toxic_model=None, jailbreak_model=jailbreak_model)
|
|||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
class EmbeddingRequest(BaseModel):
|
||||
input: str
|
||||
model: str
|
||||
|
|
@ -63,12 +61,13 @@ async def models():
|
|||
|
||||
@app.post("/embeddings")
|
||||
async def embedding(req: EmbeddingRequest, res: Response):
|
||||
print(f"Embedding Call Start Time: {time.time()}")
|
||||
|
||||
if req.model not in transformers:
|
||||
raise HTTPException(status_code=400, detail="unknown model: " + req.model)
|
||||
|
||||
start = time.time()
|
||||
embeddings = transformers[req.model].encode([req.input])
|
||||
print(f"Embedding Call Complete Time: {time.time()-start}")
|
||||
logger.info(f"Embedding Call Complete Time: {time.time()-start}")
|
||||
data = []
|
||||
|
||||
for embedding in embeddings.tolist():
|
||||
|
|
@ -78,10 +77,8 @@ async def embedding(req: EmbeddingRequest, res: Response):
|
|||
"prompt_tokens": 0,
|
||||
"total_tokens": 0,
|
||||
}
|
||||
print(f"Embedding Call Complete Time: {time.time()}")
|
||||
return {"data": data, "model": req.model, "object": "list", "usage": usage}
|
||||
|
||||
|
||||
class GuardRequest(BaseModel):
|
||||
input: str
|
||||
task: str
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue