docs: docstring updatged to reflect server capabilities

2026-04-01 13:11:15 +02:00 · 2026-04-01 13:11:15 +02:00 · 0d88de3bef
commit 0d88de3bef
parent c466c49c14
1 changed files with 39 additions and 8 deletions
--- a/nomyo/nomyo.py
+++ b/nomyo/nomyo.py
@ -121,19 +121,28 @@ class SecureChatCompletion:
                Supported parameters include:
                - temperature: float (0-2)
                - max_tokens: int
-                - tools: List of tool definitions
-                - tool_choice: str ("auto", "none", or specific tool name)
+                - top_p: float
                - stop: Union[str, List[str]]
-                - presence_penalty: float
-                - frequency_penalty: float
+                - presence_penalty: float (-2.0 to 2.0)
+                - frequency_penalty: float (-2.0 to 2.0)
+                - n: int — number of completions to generate
+                - best_of: int — generate this many and return the best
+                - seed: int — for reproducible outputs
                - logit_bias: Dict[str, float]
                - user: str
+                - tools: List of tool definitions (passed through to llama.cpp)
+                - tool_choice: str ("auto", "none", or specific tool name)
+                - response_format: Dict — controls output format, e.g.
+                    {"type": "json_object"} or
+                    {"type": "json_schema", "json_schema": {...}}
+                - stream: bool — NOT supported for encrypted inference; the server
+                    will reject this with HTTP 400. Always use stream=False (default).
                - base_url: str (alternative to initializing with router_url)
                - security_tier: str ("standard", "high", or "maximum")
                    Controls hardware routing and security level:
-                    * "standard": general secure inference
-                    * "high": sensitive business data
-                    * "maximum": maximum isolation (PHI, classified data)
+                    * "standard": general secure inference (GPU)
+                    * "high": sensitive business data (balanced CPU/GPU)
+                    * "maximum": maximum isolation (PHI, classified data — CPU only)
                    If not specified, server uses default based on model name mapping.

        Returns:
@ -149,7 +158,12 @@ class SecureChatCompletion:
                        "message": {
                            "role": str,
                            "content": str,
-                            "tool_calls": List[Dict]  # if tools were used
+                            "tool_calls": List[Dict],       # present if tools were used
+                            "reasoning_content": str        # present for thinking models
+                                                            # (e.g. Qwen3, DeepSeek-R1);
+                                                            # contains the model's internal
+                                                            # chain-of-thought, separate from
+                                                            # the final answer in "content"
                        },
                        "finish_reason": str
                    }
@ -158,6 +172,23 @@ class SecureChatCompletion:
                    "prompt_tokens": int,
                    "completion_tokens": int,
                    "total_tokens": int
+                },
+                "_metadata": {
+                    "payload_id": str,          # echoes the X-Payload-ID sent with the request
+                    "processed_at": int,        # Unix timestamp of server-side processing
+                    "is_encrypted": bool,       # always True for this endpoint
+                    "response_status": str,     # "success" on success
+                    "security_tier": str,       # active tier: "standard", "high", or "maximum"
+                    "memory_protection": {      # server-side memory protection capabilities
+                        "platform": str,        # e.g. "linux", "windows", "darwin"
+                        "memory_locking": bool, # whether mlock/VirtualLock succeeded
+                        "secure_zeroing": bool, # whether memset-based zeroing is available
+                        "core_dump_prevention": bool  # whether core dumps are suppressed
+                    },
+                    "cuda_device": {            # privacy-safe GPU info (hashed identifiers)
+                        "available": bool,
+                        "device_hash": str      # SHA-256 of device name — not the raw name
+                    }
                }
            }