Update Arch-Guard and corresponding e2e test

This commit is contained in:
Shuguang Chen 2024-12-06 13:41:18 -08:00
parent 6dc92fbbc1
commit 2fd8a5a06d
2 changed files with 15 additions and 5 deletions

View file

@ -1,7 +1,8 @@
@model_server_endpoint = http://localhost:51000
@archfc_endpoint = https://api.fc.archgw.com
### talk to model_server for completion
# talk to function calling endpoint
POST {{model_server_endpoint}}/function_calling HTTP/1.1
Content-Type: application/json
@ -41,7 +42,6 @@ Content-Type: application/json
}
# talk to Arch-Function directly for completion
POST {{archfc_endpoint}}/v1/chat/completions HTTP/1.1
Content-Type: application/json
@ -59,3 +59,13 @@ Content-Type: application/json
"continue_final_message": true,
"add_generation_prompt": false
}
# talk to guardrails endpoint
POST {{model_server_endpoint}}/guardrails HTTP/1.1
Content-Type: application/json
{
"input": "how is the weather in seattle for next 10 days",
"task": "jailbreak"
}

View file

@ -105,7 +105,7 @@ class ArchGuardHanlder:
sentence = None
return GuardResponse(
prob=prob.item(), verdict=verdict, sentence=sentence, latency=latency
prob=[prob.item()], verdict=verdict, sentence=[sentence], latency=latency
)
def predict(self, req: GuardRequest, max_num_words=300) -> GuardResponse:
@ -138,9 +138,9 @@ class ArchGuardHanlder:
chunk_result = self._predict_text(req.task, chunk)
if chunk_result.verdict:
prob.append(chunk_result.prob)
prob.append(chunk_result.prob[0])
verdict = True
sentence.append(chunk_result.sentence)
sentence.append(chunk_result.sentence[0])
latency += chunk_result.latency
return GuardResponse(