mirror of
https://github.com/katanemo/plano.git
synced 2026-05-24 14:05:14 +02:00
formating and mointoring change (#136)
This commit is contained in:
parent
976b2eaae0
commit
93abe553e3
8 changed files with 83 additions and 47 deletions
|
|
@ -5,7 +5,7 @@ from app.load_models import (
|
|||
load_transformers,
|
||||
load_guard_model,
|
||||
load_zero_shot_models,
|
||||
get_device
|
||||
get_device,
|
||||
)
|
||||
import os
|
||||
from app.utils import GuardHandler, split_text_into_chunks, load_yaml_config
|
||||
|
|
@ -21,17 +21,17 @@ logging.basicConfig(
|
|||
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
logger.info("Device used: " + get_device())
|
||||
transformers = load_transformers()
|
||||
zero_shot_models = load_zero_shot_models()
|
||||
guard_model_config = load_yaml_config("guard_model_config.yaml")
|
||||
|
||||
mode = os.getenv("MODE", "cloud")
|
||||
logger.info(f"Serving model mode: {mode}")
|
||||
if mode not in ['cloud', 'local-gpu', 'local-cpu']:
|
||||
if mode not in ["cloud", "local-gpu", "local-cpu"]:
|
||||
raise ValueError(f"Invalid mode: {mode}")
|
||||
if mode == 'local-cpu':
|
||||
hardware = 'cpu'
|
||||
if mode == "local-cpu":
|
||||
hardware = "cpu"
|
||||
else:
|
||||
hardware = "gpu" if torch.cuda.is_available() else "cpu"
|
||||
|
||||
|
|
@ -40,6 +40,7 @@ guard_handler = GuardHandler(toxic_model=None, jailbreak_model=jailbreak_model)
|
|||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
class EmbeddingRequest(BaseModel):
|
||||
input: str
|
||||
model: str
|
||||
|
|
@ -49,6 +50,7 @@ class EmbeddingRequest(BaseModel):
|
|||
async def healthz():
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@app.get("/models")
|
||||
async def models():
|
||||
models = []
|
||||
|
|
@ -61,12 +63,11 @@ async def models():
|
|||
|
||||
@app.post("/embeddings")
|
||||
async def embedding(req: EmbeddingRequest, res: Response):
|
||||
print(f"Embedding Call Start Time: {time.time()}")
|
||||
if req.model not in transformers:
|
||||
raise HTTPException(status_code=400, detail="unknown model: " + req.model)
|
||||
|
||||
start = time.time()
|
||||
embeddings = transformers[req.model].encode([req.input])
|
||||
|
||||
print(f"Embedding Call Complete Time: {time.time()-start}")
|
||||
data = []
|
||||
|
||||
for embedding in embeddings.tolist():
|
||||
|
|
@ -76,7 +77,7 @@ async def embedding(req: EmbeddingRequest, res: Response):
|
|||
"prompt_tokens": 0,
|
||||
"total_tokens": 0,
|
||||
}
|
||||
print(f"Embedding Call Complete Time: {time.time()}")
|
||||
|
||||
return {"data": data, "model": req.model, "object": "list", "usage": usage}
|
||||
|
||||
|
||||
|
|
@ -197,10 +198,10 @@ class HallucinationRequest(BaseModel):
|
|||
@app.post("/hallucination")
|
||||
async def hallucination(req: HallucinationRequest, res: Response):
|
||||
"""
|
||||
Hallucination API, take input as text and return the prediction of hallucination for each parameter
|
||||
parameters: dictionary of parameters and values
|
||||
example {"name": "John", "age": "25"}
|
||||
prompt: input prompt from the user
|
||||
Hallucination API, take input as text and return the prediction of hallucination for each parameter
|
||||
parameters: dictionary of parameters and values
|
||||
example {"name": "John", "age": "25"}
|
||||
prompt: input prompt from the user
|
||||
"""
|
||||
if req.model not in zero_shot_models:
|
||||
raise HTTPException(status_code=400, detail="unknown model: " + req.model)
|
||||
|
|
@ -209,9 +210,12 @@ async def hallucination(req: HallucinationRequest, res: Response):
|
|||
candidate_labels = [f"{k} is {v}" for k, v in req.parameters.items()]
|
||||
hypothesis_template = "{}"
|
||||
result = classifier(
|
||||
req.prompt, candidate_labels=candidate_labels, hypothesis_template=hypothesis_template, multi_label=True
|
||||
req.prompt,
|
||||
candidate_labels=candidate_labels,
|
||||
hypothesis_template=hypothesis_template,
|
||||
multi_label=True,
|
||||
)
|
||||
result_score = result['scores']
|
||||
result_score = result["scores"]
|
||||
result_params = {k[0]: s for k, s in zip(req.parameters.items(), result_score)}
|
||||
|
||||
return {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue