From 0d88de3beff2ddda7d146d00f546cc8e25010284 Mon Sep 17 00:00:00 2001 From: alpha-nerd-nomyo Date: Wed, 1 Apr 2026 13:11:15 +0200 Subject: [PATCH] docs: docstring updatged to reflect server capabilities --- nomyo/nomyo.py | 47 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 8 deletions(-) diff --git a/nomyo/nomyo.py b/nomyo/nomyo.py index 3e92726..7b482d6 100644 --- a/nomyo/nomyo.py +++ b/nomyo/nomyo.py @@ -121,19 +121,28 @@ class SecureChatCompletion: Supported parameters include: - temperature: float (0-2) - max_tokens: int - - tools: List of tool definitions - - tool_choice: str ("auto", "none", or specific tool name) + - top_p: float - stop: Union[str, List[str]] - - presence_penalty: float - - frequency_penalty: float + - presence_penalty: float (-2.0 to 2.0) + - frequency_penalty: float (-2.0 to 2.0) + - n: int — number of completions to generate + - best_of: int — generate this many and return the best + - seed: int — for reproducible outputs - logit_bias: Dict[str, float] - user: str + - tools: List of tool definitions (passed through to llama.cpp) + - tool_choice: str ("auto", "none", or specific tool name) + - response_format: Dict — controls output format, e.g. + {"type": "json_object"} or + {"type": "json_schema", "json_schema": {...}} + - stream: bool — NOT supported for encrypted inference; the server + will reject this with HTTP 400. Always use stream=False (default). - base_url: str (alternative to initializing with router_url) - security_tier: str ("standard", "high", or "maximum") Controls hardware routing and security level: - * "standard": general secure inference - * "high": sensitive business data - * "maximum": maximum isolation (PHI, classified data) + * "standard": general secure inference (GPU) + * "high": sensitive business data (balanced CPU/GPU) + * "maximum": maximum isolation (PHI, classified data — CPU only) If not specified, server uses default based on model name mapping. Returns: @@ -149,7 +158,12 @@ class SecureChatCompletion: "message": { "role": str, "content": str, - "tool_calls": List[Dict] # if tools were used + "tool_calls": List[Dict], # present if tools were used + "reasoning_content": str # present for thinking models + # (e.g. Qwen3, DeepSeek-R1); + # contains the model's internal + # chain-of-thought, separate from + # the final answer in "content" }, "finish_reason": str } @@ -158,6 +172,23 @@ class SecureChatCompletion: "prompt_tokens": int, "completion_tokens": int, "total_tokens": int + }, + "_metadata": { + "payload_id": str, # echoes the X-Payload-ID sent with the request + "processed_at": int, # Unix timestamp of server-side processing + "is_encrypted": bool, # always True for this endpoint + "response_status": str, # "success" on success + "security_tier": str, # active tier: "standard", "high", or "maximum" + "memory_protection": { # server-side memory protection capabilities + "platform": str, # e.g. "linux", "windows", "darwin" + "memory_locking": bool, # whether mlock/VirtualLock succeeded + "secure_zeroing": bool, # whether memset-based zeroing is available + "core_dump_prevention": bool # whether core dumps are suppressed + }, + "cuda_device": { # privacy-safe GPU info (hashed identifiers) + "available": bool, + "device_hash": str # SHA-256 of device name — not the raw name + } } }