docs: docstring updatged to reflect server capabilities

This commit is contained in:
Alpha Nerd 2026-04-01 13:11:15 +02:00
parent c466c49c14
commit 0d88de3bef

View file

@ -121,19 +121,28 @@ class SecureChatCompletion:
Supported parameters include:
- temperature: float (0-2)
- max_tokens: int
- tools: List of tool definitions
- tool_choice: str ("auto", "none", or specific tool name)
- top_p: float
- stop: Union[str, List[str]]
- presence_penalty: float
- frequency_penalty: float
- presence_penalty: float (-2.0 to 2.0)
- frequency_penalty: float (-2.0 to 2.0)
- n: int number of completions to generate
- best_of: int generate this many and return the best
- seed: int for reproducible outputs
- logit_bias: Dict[str, float]
- user: str
- tools: List of tool definitions (passed through to llama.cpp)
- tool_choice: str ("auto", "none", or specific tool name)
- response_format: Dict controls output format, e.g.
{"type": "json_object"} or
{"type": "json_schema", "json_schema": {...}}
- stream: bool NOT supported for encrypted inference; the server
will reject this with HTTP 400. Always use stream=False (default).
- base_url: str (alternative to initializing with router_url)
- security_tier: str ("standard", "high", or "maximum")
Controls hardware routing and security level:
* "standard": general secure inference
* "high": sensitive business data
* "maximum": maximum isolation (PHI, classified data)
* "standard": general secure inference (GPU)
* "high": sensitive business data (balanced CPU/GPU)
* "maximum": maximum isolation (PHI, classified data CPU only)
If not specified, server uses default based on model name mapping.
Returns:
@ -149,7 +158,12 @@ class SecureChatCompletion:
"message": {
"role": str,
"content": str,
"tool_calls": List[Dict] # if tools were used
"tool_calls": List[Dict], # present if tools were used
"reasoning_content": str # present for thinking models
# (e.g. Qwen3, DeepSeek-R1);
# contains the model's internal
# chain-of-thought, separate from
# the final answer in "content"
},
"finish_reason": str
}
@ -158,6 +172,23 @@ class SecureChatCompletion:
"prompt_tokens": int,
"completion_tokens": int,
"total_tokens": int
},
"_metadata": {
"payload_id": str, # echoes the X-Payload-ID sent with the request
"processed_at": int, # Unix timestamp of server-side processing
"is_encrypted": bool, # always True for this endpoint
"response_status": str, # "success" on success
"security_tier": str, # active tier: "standard", "high", or "maximum"
"memory_protection": { # server-side memory protection capabilities
"platform": str, # e.g. "linux", "windows", "darwin"
"memory_locking": bool, # whether mlock/VirtualLock succeeded
"secure_zeroing": bool, # whether memset-based zeroing is available
"core_dump_prevention": bool # whether core dumps are suppressed
},
"cuda_device": { # privacy-safe GPU info (hashed identifiers)
"available": bool,
"device_hash": str # SHA-256 of device name — not the raw name
}
}
}