From 0d88de3beff2ddda7d146d00f546cc8e25010284 Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo <alpha-nerd@nomyo.ai>
Date: Wed, 1 Apr 2026 13:11:15 +0200
Subject: [PATCH] docs: docstring updatged to reflect server capabilities

---
 nomyo/nomyo.py | 47 +++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 8 deletions(-)

diff --git a/nomyo/nomyo.py b/nomyo/nomyo.py
index 3e92726..7b482d6 100644
--- a/nomyo/nomyo.py
+++ b/nomyo/nomyo.py
@@ -121,19 +121,28 @@ class SecureChatCompletion:
                 Supported parameters include:
                 - temperature: float (0-2)
                 - max_tokens: int
-                - tools: List of tool definitions
-                - tool_choice: str ("auto", "none", or specific tool name)
+                - top_p: float
                 - stop: Union[str, List[str]]
-                - presence_penalty: float
-                - frequency_penalty: float
+                - presence_penalty: float (-2.0 to 2.0)
+                - frequency_penalty: float (-2.0 to 2.0)
+                - n: int — number of completions to generate
+                - best_of: int — generate this many and return the best
+                - seed: int — for reproducible outputs
                 - logit_bias: Dict[str, float]
                 - user: str
+                - tools: List of tool definitions (passed through to llama.cpp)
+                - tool_choice: str ("auto", "none", or specific tool name)
+                - response_format: Dict — controls output format, e.g.
+                    {"type": "json_object"} or
+                    {"type": "json_schema", "json_schema": {...}}
+                - stream: bool — NOT supported for encrypted inference; the server
+                    will reject this with HTTP 400. Always use stream=False (default).
                 - base_url: str (alternative to initializing with router_url)
                 - security_tier: str ("standard", "high", or "maximum")
                     Controls hardware routing and security level:
-                    * "standard": general secure inference
-                    * "high": sensitive business data
-                    * "maximum": maximum isolation (PHI, classified data)
+                    * "standard": general secure inference (GPU)
+                    * "high": sensitive business data (balanced CPU/GPU)
+                    * "maximum": maximum isolation (PHI, classified data — CPU only)
                     If not specified, server uses default based on model name mapping.
 
         Returns:
@@ -149,7 +158,12 @@ class SecureChatCompletion:
                         "message": {
                             "role": str,
                             "content": str,
-                            "tool_calls": List[Dict]  # if tools were used
+                            "tool_calls": List[Dict],       # present if tools were used
+                            "reasoning_content": str        # present for thinking models
+                                                            # (e.g. Qwen3, DeepSeek-R1);
+                                                            # contains the model's internal
+                                                            # chain-of-thought, separate from
+                                                            # the final answer in "content"
                         },
                         "finish_reason": str
                     }
@@ -158,6 +172,23 @@ class SecureChatCompletion:
                     "prompt_tokens": int,
                     "completion_tokens": int,
                     "total_tokens": int
+                },
+                "_metadata": {
+                    "payload_id": str,          # echoes the X-Payload-ID sent with the request
+                    "processed_at": int,        # Unix timestamp of server-side processing
+                    "is_encrypted": bool,       # always True for this endpoint
+                    "response_status": str,     # "success" on success
+                    "security_tier": str,       # active tier: "standard", "high", or "maximum"
+                    "memory_protection": {      # server-side memory protection capabilities
+                        "platform": str,        # e.g. "linux", "windows", "darwin"
+                        "memory_locking": bool, # whether mlock/VirtualLock succeeded
+                        "secure_zeroing": bool, # whether memset-based zeroing is available
+                        "core_dump_prevention": bool  # whether core dumps are suppressed
+                    },
+                    "cuda_device": {            # privacy-safe GPU info (hashed identifiers)
+                        "available": bool,
+                        "device_hash": str      # SHA-256 of device name — not the raw name
+                    }
                 }
             }