diff --git a/e2e_tests/api_model_server.rest b/e2e_tests/api_model_server.rest index c5fa0850..74bda508 100644 --- a/e2e_tests/api_model_server.rest +++ b/e2e_tests/api_model_server.rest @@ -1,7 +1,8 @@ @model_server_endpoint = http://localhost:51000 @archfc_endpoint = https://api.fc.archgw.com -### talk to model_server for completion + +# talk to function calling endpoint POST {{model_server_endpoint}}/function_calling HTTP/1.1 Content-Type: application/json @@ -41,7 +42,6 @@ Content-Type: application/json } - # talk to Arch-Function directly for completion POST {{archfc_endpoint}}/v1/chat/completions HTTP/1.1 Content-Type: application/json @@ -59,3 +59,13 @@ Content-Type: application/json "continue_final_message": true, "add_generation_prompt": false } + + +# talk to guardrails endpoint +POST {{model_server_endpoint}}/guardrails HTTP/1.1 +Content-Type: application/json + +{ + "input": "how is the weather in seattle for next 10 days", + "task": "jailbreak" +} diff --git a/model_server/app/model_handler/guardrails.py b/model_server/app/model_handler/guardrails.py index f733552e..4f6eaf0e 100644 --- a/model_server/app/model_handler/guardrails.py +++ b/model_server/app/model_handler/guardrails.py @@ -105,7 +105,7 @@ class ArchGuardHanlder: sentence = None return GuardResponse( - prob=prob.item(), verdict=verdict, sentence=sentence, latency=latency + prob=[prob.item()], verdict=verdict, sentence=[sentence], latency=latency ) def predict(self, req: GuardRequest, max_num_words=300) -> GuardResponse: @@ -138,9 +138,9 @@ class ArchGuardHanlder: chunk_result = self._predict_text(req.task, chunk) if chunk_result.verdict: - prob.append(chunk_result.prob) + prob.append(chunk_result.prob[0]) verdict = True - sentence.append(chunk_result.sentence) + sentence.append(chunk_result.sentence[0]) latency += chunk_result.latency return GuardResponse(