diff --git a/model_server/app/main.py b/model_server/app/main.py index f3ea2496..95be9578 100644 --- a/model_server/app/main.py +++ b/model_server/app/main.py @@ -59,6 +59,7 @@ async def models(): @app.post("/embeddings") async def embedding(req: EmbeddingRequest, res: Response): + logger.info(f"Embedding req: {req}") if req.model != transformers["model_name"]: raise HTTPException(status_code=400, detail="unknown model: " + req.model) @@ -70,7 +71,7 @@ async def embedding(req: EmbeddingRequest, res: Response): embeddings = embeddings[0][:, 0] # normalize embeddings embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1).detach().numpy() - print(f"Embedding Call Complete Time: {time.time()-start}") + logger.info(f"Embedding Call Complete Time: {time.time()-start}") data = [] for embedding in embeddings.tolist(): diff --git a/model_server/pyproject.toml b/model_server/pyproject.toml index 08f6f8d8..0579779c 100644 --- a/model_server/pyproject.toml +++ b/model_server/pyproject.toml @@ -22,12 +22,13 @@ pyyaml = "6.0.2" accelerate = "*" psutil = "6.0.0" optimum-intel = "*" -openvino = "*" +openvino = "2024.4.0" pandas = "*" dateparser = "*" openai = "1.50.2" tf-keras = "*" -onnx = "*" +onnx = "1.17.0" +onnxruntime = "1.19.2" [tool.poetry.scripts] archgw_modelserver = "app:run_server" diff --git a/model_server/requirements.txt b/model_server/requirements.txt index ad17cff7..c84dc20c 100644 --- a/model_server/requirements.txt +++ b/model_server/requirements.txt @@ -16,4 +16,5 @@ openai==1.50.2 pandas tf-keras onnx==1.17.0 +onnxruntime==1.19.2 pytest