mirror of
https://github.com/katanemo/plano.git
synced 2026-06-20 15:28:07 +02:00
archgw cli (#117)
* initial commit of the insurange agent demo, with the CLI tool * committing the cli * fixed some field descriptions for generate-prompt-targets * CLI works with buil, up and down commands. Function calling example works stand-alone * fixed README to install archgw cli * fixing based on feedback * fixing based on feedback --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local>
This commit is contained in:
parent
af018e5fd8
commit
dc57f119a0
30 changed files with 1087 additions and 203 deletions
|
|
@ -36,7 +36,8 @@ if mode == "cloud":
|
|||
base_url=fc_url,
|
||||
api_key="EMPTY",
|
||||
)
|
||||
chosen_model = "fc-cloud"
|
||||
models = client.models.list()
|
||||
chosen_model = models.data[0].id
|
||||
endpoint = fc_url
|
||||
else:
|
||||
client = OpenAI(
|
||||
|
|
@ -50,7 +51,6 @@ logger.info(f"serving mode: {mode}")
|
|||
logger.info(f"using model: {chosen_model}")
|
||||
logger.info(f"using endpoint: {endpoint}")
|
||||
|
||||
|
||||
async def chat_completion(req: ChatMessage, res: Response):
|
||||
logger.info("starting request")
|
||||
tools_encoded = handler._format_system(req.tools)
|
||||
|
|
|
|||
|
|
@ -26,8 +26,7 @@ zero_shot_models = load_zero_shot_models()
|
|||
|
||||
with open("guard_model_config.yaml") as f:
|
||||
guard_model_config = yaml.safe_load(f)
|
||||
with open('/root/arch_config.yaml') as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
mode = os.getenv("MODE", "cloud")
|
||||
logger.info(f"Serving model mode: {mode}")
|
||||
if mode not in ['cloud', 'local-gpu', 'local-cpu']:
|
||||
|
|
@ -37,20 +36,11 @@ if mode == 'local-cpu':
|
|||
else:
|
||||
hardware = "gpu" if torch.cuda.is_available() else "cpu"
|
||||
|
||||
if "prompt_guards" in config.keys():
|
||||
task = list(config["prompt_guards"]["input_guards"].keys())[0]
|
||||
|
||||
hardware = "gpu" if torch.cuda.is_available() else "cpu"
|
||||
jailbreak_model = load_guard_model(
|
||||
guard_model_config["jailbreak"][hardware], hardware
|
||||
)
|
||||
toxic_model = None
|
||||
|
||||
guard_handler = GuardHandler(toxic_model=toxic_model, jailbreak_model=jailbreak_model)
|
||||
jailbreak_model = load_guard_model(guard_model_config["jailbreak"][hardware], hardware)
|
||||
guard_handler = GuardHandler(toxic_model=None, jailbreak_model=jailbreak_model)
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
class EmbeddingRequest(BaseModel):
|
||||
input: str
|
||||
model: str
|
||||
|
|
|
|||
|
|
@ -1,3 +1,3 @@
|
|||
jailbreak:
|
||||
cpu: "katanemolabs/jailbreak_ovn_4bit"
|
||||
gpu: "katanemolabs/Bolt-Guard-EEtq"
|
||||
cpu: "katanemolabs/Arch-Guard-cpu"
|
||||
gpu: "katanemolabs/Arch-Guard-gpu"
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
params:
|
||||
temperature: 0.01
|
||||
top_p : 0.5
|
||||
repetition_penalty: 1.0
|
||||
top_k: 50
|
||||
max_tokens: 512
|
||||
stop_token_ids: [151645, 151643]
|
||||
|
|
|
|||
|
|
@ -16,3 +16,4 @@ dateparser
|
|||
openai
|
||||
pandas
|
||||
tf-keras
|
||||
onnx
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue