docs: docstring updatged to reflect server capabilities
This commit is contained in:
parent
c466c49c14
commit
0d88de3bef
1 changed files with 39 additions and 8 deletions
|
|
@ -121,19 +121,28 @@ class SecureChatCompletion:
|
|||
Supported parameters include:
|
||||
- temperature: float (0-2)
|
||||
- max_tokens: int
|
||||
- tools: List of tool definitions
|
||||
- tool_choice: str ("auto", "none", or specific tool name)
|
||||
- top_p: float
|
||||
- stop: Union[str, List[str]]
|
||||
- presence_penalty: float
|
||||
- frequency_penalty: float
|
||||
- presence_penalty: float (-2.0 to 2.0)
|
||||
- frequency_penalty: float (-2.0 to 2.0)
|
||||
- n: int — number of completions to generate
|
||||
- best_of: int — generate this many and return the best
|
||||
- seed: int — for reproducible outputs
|
||||
- logit_bias: Dict[str, float]
|
||||
- user: str
|
||||
- tools: List of tool definitions (passed through to llama.cpp)
|
||||
- tool_choice: str ("auto", "none", or specific tool name)
|
||||
- response_format: Dict — controls output format, e.g.
|
||||
{"type": "json_object"} or
|
||||
{"type": "json_schema", "json_schema": {...}}
|
||||
- stream: bool — NOT supported for encrypted inference; the server
|
||||
will reject this with HTTP 400. Always use stream=False (default).
|
||||
- base_url: str (alternative to initializing with router_url)
|
||||
- security_tier: str ("standard", "high", or "maximum")
|
||||
Controls hardware routing and security level:
|
||||
* "standard": general secure inference
|
||||
* "high": sensitive business data
|
||||
* "maximum": maximum isolation (PHI, classified data)
|
||||
* "standard": general secure inference (GPU)
|
||||
* "high": sensitive business data (balanced CPU/GPU)
|
||||
* "maximum": maximum isolation (PHI, classified data — CPU only)
|
||||
If not specified, server uses default based on model name mapping.
|
||||
|
||||
Returns:
|
||||
|
|
@ -149,7 +158,12 @@ class SecureChatCompletion:
|
|||
"message": {
|
||||
"role": str,
|
||||
"content": str,
|
||||
"tool_calls": List[Dict] # if tools were used
|
||||
"tool_calls": List[Dict], # present if tools were used
|
||||
"reasoning_content": str # present for thinking models
|
||||
# (e.g. Qwen3, DeepSeek-R1);
|
||||
# contains the model's internal
|
||||
# chain-of-thought, separate from
|
||||
# the final answer in "content"
|
||||
},
|
||||
"finish_reason": str
|
||||
}
|
||||
|
|
@ -158,6 +172,23 @@ class SecureChatCompletion:
|
|||
"prompt_tokens": int,
|
||||
"completion_tokens": int,
|
||||
"total_tokens": int
|
||||
},
|
||||
"_metadata": {
|
||||
"payload_id": str, # echoes the X-Payload-ID sent with the request
|
||||
"processed_at": int, # Unix timestamp of server-side processing
|
||||
"is_encrypted": bool, # always True for this endpoint
|
||||
"response_status": str, # "success" on success
|
||||
"security_tier": str, # active tier: "standard", "high", or "maximum"
|
||||
"memory_protection": { # server-side memory protection capabilities
|
||||
"platform": str, # e.g. "linux", "windows", "darwin"
|
||||
"memory_locking": bool, # whether mlock/VirtualLock succeeded
|
||||
"secure_zeroing": bool, # whether memset-based zeroing is available
|
||||
"core_dump_prevention": bool # whether core dumps are suppressed
|
||||
},
|
||||
"cuda_device": { # privacy-safe GPU info (hashed identifiers)
|
||||
"available": bool,
|
||||
"device_hash": str # SHA-256 of device name — not the raw name
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue