Compare commits
No commits in common. "main" and "v0.2.4" have entirely different histories.
15 changed files with 156 additions and 411 deletions
|
|
@ -7,10 +7,16 @@ on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
publish:
|
build:
|
||||||
runs-on: docker-amd64
|
name: Build (${{ matrix.runner }}, py${{ matrix.python }})
|
||||||
|
runs-on: ${{ matrix.runner }}
|
||||||
container:
|
container:
|
||||||
image: python:3.12-bookworm
|
image: python:${{ matrix.python }}-bookworm
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
python: ["3.10", "3.11", "3.12"]
|
||||||
|
runner: [docker-amd64, docker-arm64]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
|
|
@ -20,13 +26,36 @@ jobs:
|
||||||
.
|
.
|
||||||
|
|
||||||
- name: Install build tools
|
- name: Install build tools
|
||||||
run: pip install build twine
|
run: pip install build Cython
|
||||||
|
|
||||||
- name: Build package
|
- name: Build wheel
|
||||||
run: python -m build
|
run: python -m build --wheel
|
||||||
|
|
||||||
|
- name: Upload wheel artifact
|
||||||
|
uses: actions/upload-artifact@v3
|
||||||
|
with:
|
||||||
|
name: wheel-${{ matrix.runner }}-py${{ matrix.python }}
|
||||||
|
path: dist/*.whl
|
||||||
|
|
||||||
|
publish:
|
||||||
|
name: Publish to PyPI
|
||||||
|
needs: build
|
||||||
|
runs-on: docker-amd64
|
||||||
|
container:
|
||||||
|
image: python:3.12-bookworm
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Download all wheels
|
||||||
|
uses: actions/download-artifact@v3
|
||||||
|
with:
|
||||||
|
path: dist/
|
||||||
|
merge-multiple: true
|
||||||
|
|
||||||
|
- name: Install twine
|
||||||
|
run: pip install twine
|
||||||
|
|
||||||
- name: Publish to PyPI
|
- name: Publish to PyPI
|
||||||
env:
|
env:
|
||||||
TWINE_USERNAME: __token__
|
TWINE_USERNAME: __token__
|
||||||
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
|
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
|
||||||
run: twine upload dist/*
|
run: twine upload dist/**/*.whl
|
||||||
|
|
|
||||||
17
README.md
17
README.md
|
|
@ -10,17 +10,6 @@
|
||||||
|
|
||||||
## 🚀 Quick Start
|
## 🚀 Quick Start
|
||||||
|
|
||||||
### 0. Try It Now (Demo Credentials)
|
|
||||||
|
|
||||||
No account needed — use these public demo credentials to test immediately:
|
|
||||||
|
|
||||||
| | |
|
|
||||||
|---|---|
|
|
||||||
| **API key** | `NOMYO_AI_E2EE_INFERENCE` |
|
|
||||||
| **Model** | `Qwen/Qwen3-0.6B` |
|
|
||||||
|
|
||||||
> **Note:** The demo endpoint uses a fixed 256-token context window and is intended for evaluation only.
|
|
||||||
|
|
||||||
### 1. Install methods
|
### 1. Install methods
|
||||||
|
|
||||||
via pip (recommended):
|
via pip (recommended):
|
||||||
|
|
@ -360,8 +349,7 @@ SecureChatCompletion(
|
||||||
base_url: str = "https://api.nomyo.ai",
|
base_url: str = "https://api.nomyo.ai",
|
||||||
allow_http: bool = False,
|
allow_http: bool = False,
|
||||||
api_key: Optional[str] = None,
|
api_key: Optional[str] = None,
|
||||||
secure_memory: bool = True,
|
secure_memory: bool = True
|
||||||
max_retries: int = 2
|
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -371,7 +359,6 @@ SecureChatCompletion(
|
||||||
- `allow_http`: Allow HTTP connections (ONLY for local development, never in production)
|
- `allow_http`: Allow HTTP connections (ONLY for local development, never in production)
|
||||||
- `api_key`: Optional API key for bearer authentication
|
- `api_key`: Optional API key for bearer authentication
|
||||||
- `secure_memory`: Enable secure memory protection (default: True)
|
- `secure_memory`: Enable secure memory protection (default: True)
|
||||||
- `max_retries`: Retries on retryable errors (429, 500, 502, 503, 504, network errors) with exponential backoff. Default: 2
|
|
||||||
|
|
||||||
#### Methods
|
#### Methods
|
||||||
|
|
||||||
|
|
@ -383,7 +370,7 @@ SecureChatCompletion(
|
||||||
#### Constructor
|
#### Constructor
|
||||||
|
|
||||||
```python
|
```python
|
||||||
SecureCompletionClient(router_url: str = "https://api.nomyo.ai", allow_http: bool = False, max_retries: int = 2)
|
SecureCompletionClient(router_url: str = "https://api.nomyo.ai")
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Methods
|
#### Methods
|
||||||
|
|
|
||||||
|
|
@ -3,8 +3,6 @@
|
||||||
This documentation provides comprehensive information about using the NOMYO Secure Python Chat Client, a drop-in replacement for OpenAI's ChatCompletion API with end-to-end (E2E) encryption.
|
This documentation provides comprehensive information about using the NOMYO Secure Python Chat Client, a drop-in replacement for OpenAI's ChatCompletion API with end-to-end (E2E) encryption.
|
||||||
To use this client library you need a paid subscribtion on [NOMYO Inference](https://chat.nomyo.ai/).
|
To use this client library you need a paid subscribtion on [NOMYO Inference](https://chat.nomyo.ai/).
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
The NOMYO Secure Client provides:
|
The NOMYO Secure Client provides:
|
||||||
|
|
@ -46,11 +44,9 @@ asyncio.run(main())
|
||||||
1. [Installation](installation.md) - How to install and set up the client
|
1. [Installation](installation.md) - How to install and set up the client
|
||||||
2. [Getting Started](getting-started.md) - Quick start guide with examples
|
2. [Getting Started](getting-started.md) - Quick start guide with examples
|
||||||
3. [API Reference](api-reference.md) - Complete API documentation
|
3. [API Reference](api-reference.md) - Complete API documentation
|
||||||
4. [Models](models.md) - Available models and selection guide
|
4. [Security Guide](security-guide.md) - Security features and best practices
|
||||||
5. [Security Guide](security-guide.md) - Security features and best practices
|
5. [Examples](examples.md) - Advanced usage scenarios
|
||||||
6. [Examples](examples.md) - Advanced usage scenarios
|
6. [Troubleshooting](troubleshooting.md) - Common issues and solutions
|
||||||
7. [Rate Limits](rate-limits.md) - Request limits, burst allowance, and error handling
|
|
||||||
8. [Troubleshooting](troubleshooting.md) - Common issues and solutions
|
|
||||||
|
|
||||||
## Key Features
|
## Key Features
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,8 +11,7 @@ SecureChatCompletion(
|
||||||
base_url: str = "https://api.nomyo.ai",
|
base_url: str = "https://api.nomyo.ai",
|
||||||
allow_http: bool = False,
|
allow_http: bool = False,
|
||||||
api_key: Optional[str] = None,
|
api_key: Optional[str] = None,
|
||||||
secure_memory: bool = True,
|
secure_memory: bool = True
|
||||||
max_retries: int = 2
|
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -22,7 +21,6 @@ SecureChatCompletion(
|
||||||
- `allow_http` (bool): Allow HTTP connections (ONLY for local development, never in production)
|
- `allow_http` (bool): Allow HTTP connections (ONLY for local development, never in production)
|
||||||
- `api_key` (Optional[str]): Optional API key for bearer authentication
|
- `api_key` (Optional[str]): Optional API key for bearer authentication
|
||||||
- `secure_memory` (bool): Enable secure memory protection (default: True)
|
- `secure_memory` (bool): Enable secure memory protection (default: True)
|
||||||
- `max_retries` (int): Number of retries on retryable errors (429, 500, 502, 503, 504, network errors). Uses exponential backoff. Default: 2
|
|
||||||
|
|
||||||
### Methods
|
### Methods
|
||||||
|
|
||||||
|
|
@ -75,30 +73,10 @@ A dictionary containing the chat completion response with the following structur
|
||||||
"prompt_tokens": int,
|
"prompt_tokens": int,
|
||||||
"completion_tokens": int,
|
"completion_tokens": int,
|
||||||
"total_tokens": int
|
"total_tokens": int
|
||||||
},
|
|
||||||
"_metadata": {
|
|
||||||
"payload_id": str,
|
|
||||||
"processed_at": int, # Unix timestamp
|
|
||||||
"is_encrypted": bool,
|
|
||||||
"response_status": str,
|
|
||||||
"security_tier": str, # "standard", "high", or "maximum"
|
|
||||||
"memory_protection": dict, # server-side memory protection info
|
|
||||||
"cuda_device": dict, # privacy-safe GPU info (hashed identifiers)
|
|
||||||
"tpm_attestation": { # TPM 2.0 hardware attestation (see Security Guide)
|
|
||||||
"is_available": bool,
|
|
||||||
# Present only when is_available is True:
|
|
||||||
"pcr_banks": str, # e.g. "sha256:0,7,10"
|
|
||||||
"pcr_values": dict, # {bank: {pcr_index: hex_digest}}
|
|
||||||
"quote_b64": str, # base64-encoded TPMS_ATTEST (signed by AIK)
|
|
||||||
"signature_b64": str, # base64-encoded TPMT_SIGNATURE
|
|
||||||
"aik_pubkey_b64": str, # base64-encoded TPM2B_PUBLIC (ephemeral AIK)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
The `_metadata` field is added by the client library and is not part of the OpenAI API response format. See the [Security Guide](security-guide.md) for how to interpret and verify `tpm_attestation`.
|
|
||||||
|
|
||||||
#### acreate(model, messages, **kwargs)
|
#### acreate(model, messages, **kwargs)
|
||||||
|
|
||||||
Async alias for create() method.
|
Async alias for create() method.
|
||||||
|
|
@ -114,18 +92,13 @@ The `SecureCompletionClient` class handles the underlying encryption, key manage
|
||||||
### Constructor
|
### Constructor
|
||||||
|
|
||||||
```python
|
```python
|
||||||
SecureCompletionClient(
|
SecureCompletionClient(router_url: str = "https://api.nomyo.ai", allow_http: bool = False)
|
||||||
router_url: str = "https://api.nomyo.ai",
|
|
||||||
allow_http: bool = False,
|
|
||||||
max_retries: int = 2
|
|
||||||
)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
**Parameters:**
|
**Parameters:**
|
||||||
|
|
||||||
- `router_url` (str): Base URL of the NOMYO Router (must use HTTPS for production)
|
- `router_url` (str): Base URL of the NOMYO Router (must use HTTPS for production)
|
||||||
- `allow_http` (bool): Allow HTTP connections (ONLY for local development, never in production)
|
- `allow_http` (bool): Allow HTTP connections (ONLY for local development, never in production)
|
||||||
- `max_retries` (int): Number of retries on retryable errors (429, 500, 502, 503, 504, network errors). Uses exponential backoff. Default: 2
|
|
||||||
|
|
||||||
### Methods
|
### Methods
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,33 +1,5 @@
|
||||||
# Getting Started
|
# Getting Started
|
||||||
|
|
||||||
## Try It Now (Demo Credentials)
|
|
||||||
|
|
||||||
You can test the client immediately using these public demo credentials — no sign-up required:
|
|
||||||
|
|
||||||
| | |
|
|
||||||
|---|---|
|
|
||||||
| **API key** | `NOMYO_AI_E2EE_INFERENCE` |
|
|
||||||
| **Model** | `Qwen/Qwen3-0.6B` |
|
|
||||||
|
|
||||||
> **Note:** The demo endpoint uses a fixed 256-token context window and is intended for evaluation only.
|
|
||||||
|
|
||||||
```python
|
|
||||||
import asyncio
|
|
||||||
from nomyo import SecureChatCompletion
|
|
||||||
|
|
||||||
async def main():
|
|
||||||
client = SecureChatCompletion(api_key="NOMYO_AI_E2EE_INFERENCE")
|
|
||||||
|
|
||||||
response = await client.create(
|
|
||||||
model="Qwen/Qwen3-0.6B",
|
|
||||||
messages=[{"role": "user", "content": "Hello!"}]
|
|
||||||
)
|
|
||||||
|
|
||||||
print(response['choices'][0]['message']['content'])
|
|
||||||
|
|
||||||
asyncio.run(main())
|
|
||||||
```
|
|
||||||
|
|
||||||
## Basic Usage
|
## Basic Usage
|
||||||
|
|
||||||
The NOMYO client provides end-to-end encryption (E2E) for all communications between your application and the NOMYO inference endpoints. This ensures that your prompts and responses are protected from unauthorized access or interception.
|
The NOMYO client provides end-to-end encryption (E2E) for all communications between your application and the NOMYO inference endpoints. This ensures that your prompts and responses are protected from unauthorized access or interception.
|
||||||
|
|
|
||||||
|
|
@ -1,48 +0,0 @@
|
||||||
# Available Models
|
|
||||||
|
|
||||||
All models are available via `api.nomyo.ai`. Pass the model ID string directly to the `model` parameter of `create()`.
|
|
||||||
|
|
||||||
## Model List
|
|
||||||
|
|
||||||
| Model ID | Parameters | Type | Notes |
|
|
||||||
|---|---|---|---|
|
|
||||||
| `Qwen/Qwen3-0.6B` | 0.6B | General | Lightweight, fast inference |
|
|
||||||
| `Qwen/Qwen3.5-0.8B` | 0.8B | General | Lightweight, fast inference |
|
|
||||||
| `LiquidAI/LFM2.5-1.2B-Thinking` | 1.2B | Thinking | Reasoning model |
|
|
||||||
| `ibm-granite/granite-4.0-h-small` | Small | General | IBM Granite 4.0, enterprise-focused |
|
|
||||||
| `Qwen/Qwen3.5-9B` | 9B | General | Balanced quality and speed |
|
|
||||||
| `utter-project/EuroLLM-9B-Instruct-2512` | 9B | General | Multilingual, strong European language support |
|
|
||||||
| `zai-org/GLM-4.7-Flash` | — | General | Fast GLM variant |
|
|
||||||
| `mistralai/Ministral-3-14B-Instruct-2512-GGUF` | 14B | General | Mistral instruction-tuned |
|
|
||||||
| `ServiceNow-AI/Apriel-1.6-15b-Thinker` | 15B | Thinking | Reasoning model |
|
|
||||||
| `openai/gpt-oss-20b` | 20B | General | OpenAI open-weight release |
|
|
||||||
| `LiquidAI/LFM2-24B-A2B` | 24B (2B active) | General | MoE — efficient inference |
|
|
||||||
| `Qwen/Qwen3.5-27B` | 27B | General | High quality, large context |
|
|
||||||
| `google/medgemma-27b-it` | 27B | Specialized | Medical domain, instruction-tuned |
|
|
||||||
| `nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4` | 30B (3B active) | General | MoE — efficient inference |
|
|
||||||
| `Qwen/Qwen3.5-35B-A3B` | 35B (3B active) | General | MoE — efficient inference |
|
|
||||||
| `moonshotai/Kimi-Linear-48B-A3B-Instruct` | 48B (3B active) | General | MoE — large capacity, efficient inference |
|
|
||||||
|
|
||||||
> **MoE** (Mixture of Experts) models show total/active parameter counts. Only active parameters are used per token, keeping inference cost low relative to total model size.
|
|
||||||
|
|
||||||
## Usage Example
|
|
||||||
|
|
||||||
```python
|
|
||||||
from nomyo import SecureChatCompletion
|
|
||||||
|
|
||||||
client = SecureChatCompletion(api_key="your-api-key")
|
|
||||||
|
|
||||||
response = await client.create(
|
|
||||||
model="Qwen/Qwen3.5-9B",
|
|
||||||
messages=[{"role": "user", "content": "Hello!"}]
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Choosing a Model
|
|
||||||
|
|
||||||
- **Low latency / edge use**: `Qwen/Qwen3-0.6B`, `Qwen/Qwen3.5-0.8B`, `LiquidAI/LFM2.5-1.2B-Thinking`
|
|
||||||
- **Balanced quality and speed**: `Qwen/Qwen3.5-9B`, `mistralai/Ministral-3-14B-Instruct-2512-GGUF`
|
|
||||||
- **Reasoning / chain-of-thought**: `LiquidAI/LFM2.5-1.2B-Thinking`, `ServiceNow-AI/Apriel-1.6-15b-Thinker`
|
|
||||||
- **Multilingual**: `utter-project/EuroLLM-9B-Instruct-2512`
|
|
||||||
- **Medical**: `google/medgemma-27b-it`
|
|
||||||
- **Highest quality**: `moonshotai/Kimi-Linear-48B-A3B-Instruct`, `Qwen/Qwen3.5-35B-A3B`
|
|
||||||
|
|
@ -1,61 +0,0 @@
|
||||||
# Rate Limits
|
|
||||||
|
|
||||||
The NOMYO API (`api.nomyo.ai`) enforces rate limits to ensure fair usage and service stability for all users.
|
|
||||||
|
|
||||||
## Default Rate Limit
|
|
||||||
|
|
||||||
By default, each API key is limited to **2 requests per second**.
|
|
||||||
|
|
||||||
## Burst Allowance
|
|
||||||
|
|
||||||
Short bursts above the default limit are permitted. You may send up to **4 requests per second** in burst mode, provided you have not exceeded burst usage within the current **10-second window**.
|
|
||||||
|
|
||||||
Burst capacity is granted once per 10-second window. If you consume the burst allowance, you must wait for the window to reset before burst is available again.
|
|
||||||
|
|
||||||
## Rate Limit Summary
|
|
||||||
|
|
||||||
| Mode | Limit | Condition |
|
|
||||||
|---------|--------------------|----------------------------------|
|
|
||||||
| Default | 2 requests/second | Always active |
|
|
||||||
| Burst | 4 requests/second | Once per 10-second window |
|
|
||||||
|
|
||||||
## Error Responses
|
|
||||||
|
|
||||||
### 429 Too Many Requests
|
|
||||||
|
|
||||||
Returned when your request rate exceeds the allowed limit.
|
|
||||||
|
|
||||||
```
|
|
||||||
HTTP/1.1 429 Too Many Requests
|
|
||||||
```
|
|
||||||
|
|
||||||
**What to do:** Back off and retry after a short delay. Implement exponential backoff in your client to avoid repeated limit hits.
|
|
||||||
|
|
||||||
### 503 Service Unavailable (Cool-down)
|
|
||||||
|
|
||||||
Returned when burst limits are abused repeatedly. A **30-minute cool-down** is applied to the offending API key.
|
|
||||||
|
|
||||||
```
|
|
||||||
HTTP/1.1 503 Service Unavailable
|
|
||||||
```
|
|
||||||
|
|
||||||
**What to do:** Wait 30 minutes before retrying. Review your request patterns to ensure you stay within the permitted limits.
|
|
||||||
|
|
||||||
## Best Practices
|
|
||||||
|
|
||||||
- **Throttle your requests** client-side to stay at or below 2 requests/second under normal load.
|
|
||||||
- **Use burst sparingly** — it is intended for occasional spikes, not sustained high-throughput usage.
|
|
||||||
- **Implement exponential backoff** when you receive a `429` response. Start with a short delay (e.g. 500 ms) and double it on each subsequent failure, up to a reasonable maximum.
|
|
||||||
- **Monitor for `503` responses** — repeated occurrences indicate that your usage pattern is triggering the abuse threshold. Refactor your request logic before the cool-down expires.
|
|
||||||
|
|
||||||
## Retry Behaviour
|
|
||||||
|
|
||||||
The client retries automatically on `429`, `500`, `502`, `503`, `504`, and network errors using exponential backoff (1 s, 2 s, …). The default is **2 retries**. You can raise or disable this per client:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# More retries for high-throughput workloads
|
|
||||||
client = SecureChatCompletion(api_key="...", max_retries=5)
|
|
||||||
|
|
||||||
# Disable retries entirely
|
|
||||||
client = SecureChatCompletion(api_key="...", max_retries=0)
|
|
||||||
```
|
|
||||||
Binary file not shown.
|
Before Width: | Height: | Size: 182 KiB |
|
|
@ -162,81 +162,6 @@ Secure memory features:
|
||||||
- Guarantees zeroing of sensitive memory
|
- Guarantees zeroing of sensitive memory
|
||||||
- Prevents memory dumps from containing sensitive data
|
- Prevents memory dumps from containing sensitive data
|
||||||
|
|
||||||
## Hardware Attestation (TPM 2.0)
|
|
||||||
|
|
||||||
### What it is
|
|
||||||
|
|
||||||
When the server has a TPM 2.0 chip, every response includes a `tpm_attestation` block in `_metadata`. This is a cryptographically signed hardware quote proving:
|
|
||||||
|
|
||||||
- Which firmware and Secure Boot state the server is running (PCR 0, 7)
|
|
||||||
- Which application binary is running, when IMA is active (PCR 10)
|
|
||||||
|
|
||||||
The quote is signed by an ephemeral AIK (Attestation Identity Key) generated fresh for each request and tied to the `payload_id` nonce, so it cannot be replayed for a different request.
|
|
||||||
|
|
||||||
### Reading the attestation
|
|
||||||
|
|
||||||
```python
|
|
||||||
response = await client.create(
|
|
||||||
model="Qwen/Qwen3-0.6B",
|
|
||||||
messages=[{"role": "user", "content": "..."}],
|
|
||||||
security_tier="maximum"
|
|
||||||
)
|
|
||||||
|
|
||||||
tpm = response["_metadata"].get("tpm_attestation", {})
|
|
||||||
|
|
||||||
if tpm.get("is_available"):
|
|
||||||
print("PCR banks:", tpm["pcr_banks"]) # e.g. "sha256:0,7,10"
|
|
||||||
print("PCR values:", tpm["pcr_values"]) # {bank: {index: hex}}
|
|
||||||
print("AIK key:", tpm["aik_pubkey_b64"][:32], "...")
|
|
||||||
else:
|
|
||||||
print("TPM not available on this server")
|
|
||||||
```
|
|
||||||
|
|
||||||
### Verifying the quote
|
|
||||||
|
|
||||||
The response is self-contained: `aik_pubkey_b64` is the full public key of the AIK that signed the quote, so no separate key-fetch round-trip is needed.
|
|
||||||
|
|
||||||
Verification steps using `tpm2-pytss`:
|
|
||||||
|
|
||||||
```python
|
|
||||||
import base64
|
|
||||||
from tpm2_pytss.types import TPM2B_PUBLIC, TPMT_SIGNATURE, TPM2B_ATTEST
|
|
||||||
|
|
||||||
# 1. Decode the quote components
|
|
||||||
aik_pub = TPM2B_PUBLIC.unmarshal(base64.b64decode(tpm["aik_pubkey_b64"]))[0]
|
|
||||||
quote = TPM2B_ATTEST.unmarshal(base64.b64decode(tpm["quote_b64"]))[0]
|
|
||||||
sig = TPMT_SIGNATURE.unmarshal(base64.b64decode(tpm["signature_b64"]))[0]
|
|
||||||
|
|
||||||
# 2. Verify the signature over the quote using the AIK public key
|
|
||||||
# (use a TPM ESAPI verify_signature call or an offline RSA verify)
|
|
||||||
|
|
||||||
# 3. Inspect the qualifying_data inside the quote — it must match
|
|
||||||
# SHA-256(payload_id.encode())[:16] to confirm this quote is for this request
|
|
||||||
|
|
||||||
# 4. Check pcr_values against your known-good baseline
|
|
||||||
```
|
|
||||||
|
|
||||||
> Full verification requires `tpm2-pytss` on the client side (`pip install tpm2-pytss` + `sudo apt install libtss2-dev`). It is optional — the attestation is informational unless your deployment policy requires verification.
|
|
||||||
|
|
||||||
### Behaviour per security tier
|
|
||||||
|
|
||||||
| Tier | TPM unavailable |
|
|
||||||
|------|----------------|
|
|
||||||
| `standard` | `tpm_attestation: {"is_available": false}` — request proceeds |
|
|
||||||
| `high` | same as standard |
|
|
||||||
| `maximum` | `ServiceUnavailableError` (HTTP 503) — request rejected |
|
|
||||||
|
|
||||||
For `maximum` tier, the server enforces TPM availability as a hard requirement. If your server has no TPM and you request `maximum`, catch the error explicitly:
|
|
||||||
|
|
||||||
```python
|
|
||||||
from nomyo import ServiceUnavailableError
|
|
||||||
|
|
||||||
try:
|
|
||||||
response = await client.create(..., security_tier="maximum")
|
|
||||||
except ServiceUnavailableError as e:
|
|
||||||
print("Server does not meet TPM requirements for maximum tier:", e)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Compliance Considerations
|
## Compliance Considerations
|
||||||
|
|
||||||
### HIPAA Compliance
|
### HIPAA Compliance
|
||||||
|
|
@ -282,11 +207,9 @@ response = await client.create(
|
||||||
messages=[{"role": "user", "content": "Hello"}]
|
messages=[{"role": "user", "content": "Hello"}]
|
||||||
)
|
)
|
||||||
|
|
||||||
print(response["_metadata"]) # Contains security_tier, memory_protection, tpm_attestation, etc.
|
print(response["_metadata"]) # Contains security-related information
|
||||||
```
|
```
|
||||||
|
|
||||||
See [Hardware Attestation](#hardware-attestation-tpm-20) for details on the `tpm_attestation` field.
|
|
||||||
|
|
||||||
### Logging
|
### Logging
|
||||||
|
|
||||||
Enable logging to see security operations:
|
Enable logging to see security operations:
|
||||||
|
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
# Troubleshooting
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
import asyncio, ctypes, json, base64, urllib.parse, httpx, os, secrets, sys, warnings, logging
|
import json, base64, urllib.parse, httpx, os, secrets, warnings, logging
|
||||||
from typing import Dict, Any, Optional, Union
|
from typing import Dict, Any, Optional
|
||||||
from cryptography.hazmat.primitives import serialization, hashes
|
from cryptography.hazmat.primitives import serialization, hashes
|
||||||
from cryptography.hazmat.primitives.asymmetric import rsa, padding
|
from cryptography.hazmat.primitives.asymmetric import rsa, padding
|
||||||
from cryptography.hazmat.backends import default_backend
|
from cryptography.hazmat.backends import default_backend
|
||||||
|
|
@ -76,7 +76,7 @@ class SecureCompletionClient:
|
||||||
- Response parsing
|
- Response parsing
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, router_url: str = "https://api.nomyo.ai", allow_http: bool = False, secure_memory: bool = True, max_retries: int = 2):
|
def __init__(self, router_url: str = "https://api.nomyo.ai", allow_http: bool = False, secure_memory: bool = True):
|
||||||
"""
|
"""
|
||||||
Initialize the secure completion client.
|
Initialize the secure completion client.
|
||||||
|
|
||||||
|
|
@ -84,9 +84,6 @@ class SecureCompletionClient:
|
||||||
router_url: Base URL of the NOMYO Router (must use HTTPS for production)
|
router_url: Base URL of the NOMYO Router (must use HTTPS for production)
|
||||||
allow_http: Allow HTTP connections (ONLY for local development, never in production)
|
allow_http: Allow HTTP connections (ONLY for local development, never in production)
|
||||||
secure_memory: Whether to use secure memory operations for this instance.
|
secure_memory: Whether to use secure memory operations for this instance.
|
||||||
max_retries: Number of retries on retryable errors (429, 500, 502, 503, 504,
|
|
||||||
network errors). Uses exponential backoff. Default 2, matching
|
|
||||||
the OpenAI Python SDK default.
|
|
||||||
"""
|
"""
|
||||||
self.router_url = router_url.rstrip('/')
|
self.router_url = router_url.rstrip('/')
|
||||||
self.private_key = None
|
self.private_key = None
|
||||||
|
|
@ -94,7 +91,6 @@ class SecureCompletionClient:
|
||||||
self.key_size = 4096 # RSA key size
|
self.key_size = 4096 # RSA key size
|
||||||
self.allow_http = allow_http # Store for use in fetch_server_public_key
|
self.allow_http = allow_http # Store for use in fetch_server_public_key
|
||||||
self._use_secure_memory = _SECURE_MEMORY_AVAILABLE and secure_memory
|
self._use_secure_memory = _SECURE_MEMORY_AVAILABLE and secure_memory
|
||||||
self.max_retries = max_retries
|
|
||||||
|
|
||||||
# Validate HTTPS for security
|
# Validate HTTPS for security
|
||||||
if not self.router_url.startswith("https://"):
|
if not self.router_url.startswith("https://"):
|
||||||
|
|
@ -337,11 +333,10 @@ class SecureCompletionClient:
|
||||||
except Exception:
|
except Exception:
|
||||||
raise ValueError("Failed to fetch server's public key")
|
raise ValueError("Failed to fetch server's public key")
|
||||||
|
|
||||||
async def _do_encrypt(self, payload_bytes: Union[bytes, bytearray], aes_key: Union[bytes, bytearray]) -> bytes:
|
async def _do_encrypt(self, payload_bytes: bytes, aes_key: bytes) -> bytes:
|
||||||
"""
|
"""
|
||||||
Core AES-256-GCM + RSA-OAEP encryption. Caller is responsible for
|
Core AES-256-GCM + RSA-OAEP encryption. Caller is responsible for
|
||||||
memory protection of payload_bytes and aes_key before calling this.
|
memory protection of payload_bytes and aes_key before calling this.
|
||||||
Accepts bytearray to avoid creating an unzeroed immutable bytes copy.
|
|
||||||
"""
|
"""
|
||||||
nonce = secrets.token_bytes(12) # 96-bit nonce for GCM
|
nonce = secrets.token_bytes(12) # 96-bit nonce for GCM
|
||||||
cipher = Cipher(
|
cipher = Cipher(
|
||||||
|
|
@ -358,28 +353,14 @@ class SecureCompletionClient:
|
||||||
server_public_key_pem.encode('utf-8'),
|
server_public_key_pem.encode('utf-8'),
|
||||||
backend=default_backend()
|
backend=default_backend()
|
||||||
)
|
)
|
||||||
# RSA encrypt requires bytes — an immutable copy is unavoidable here.
|
encrypted_aes_key = server_public_key.encrypt(
|
||||||
# We narrow its lifetime to this block and attempt to zero it via
|
aes_key,
|
||||||
# CPython internals immediately after use. This relies on the CPython
|
padding.OAEP(
|
||||||
# bytes object layout (ob_sval starts at getsizeof(b'')-1 from id()),
|
mgf=padding.MGF1(algorithm=hashes.SHA256()),
|
||||||
# so it is a best-effort measure on CPython only.
|
algorithm=hashes.SHA256(),
|
||||||
_key_bytes = bytes(aes_key)
|
label=None
|
||||||
try:
|
|
||||||
encrypted_aes_key = server_public_key.encrypt(
|
|
||||||
_key_bytes,
|
|
||||||
padding.OAEP(
|
|
||||||
mgf=padding.MGF1(algorithm=hashes.SHA256()),
|
|
||||||
algorithm=hashes.SHA256(),
|
|
||||||
label=None
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
finally:
|
)
|
||||||
try:
|
|
||||||
_data_offset = sys.getsizeof(b'') - 1 # offset to ob_sval in PyBytesObject
|
|
||||||
ctypes.memset(id(_key_bytes) + _data_offset, 0, len(_key_bytes))
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
del _key_bytes
|
|
||||||
|
|
||||||
encrypted_package = {
|
encrypted_package = {
|
||||||
"version": "1.0",
|
"version": "1.0",
|
||||||
|
|
@ -424,8 +405,8 @@ class SecureCompletionClient:
|
||||||
raise ValueError("Payload cannot be empty")
|
raise ValueError("Payload cannot be empty")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Serialize payload to JSON as bytearray so SecureBuffer can zero the original
|
# Serialize payload to JSON
|
||||||
payload_json = bytearray(json.dumps(payload).encode('utf-8'))
|
payload_json = json.dumps(payload).encode('utf-8')
|
||||||
|
|
||||||
# Validate payload size (prevent DoS)
|
# Validate payload size (prevent DoS)
|
||||||
MAX_PAYLOAD_SIZE = 10 * 1024 * 1024 # 10MB limit
|
MAX_PAYLOAD_SIZE = 10 * 1024 * 1024 # 10MB limit
|
||||||
|
|
@ -434,14 +415,14 @@ class SecureCompletionClient:
|
||||||
|
|
||||||
logger.debug("Payload size: %d bytes", len(payload_json))
|
logger.debug("Payload size: %d bytes", len(payload_json))
|
||||||
|
|
||||||
aes_key = bytearray(secrets.token_bytes(32)) # 256-bit key as bytearray
|
aes_key = secrets.token_bytes(32) # 256-bit key
|
||||||
try:
|
try:
|
||||||
if self._use_secure_memory:
|
if self._use_secure_memory:
|
||||||
with secure_bytearray(payload_json) as protected_payload:
|
with secure_bytearray(payload_json) as protected_payload:
|
||||||
with secure_bytearray(aes_key) as protected_aes_key:
|
with secure_bytearray(aes_key) as protected_aes_key:
|
||||||
return await self._do_encrypt(
|
return await self._do_encrypt(
|
||||||
protected_payload.data,
|
bytes(protected_payload.data),
|
||||||
protected_aes_key.data
|
bytes(protected_aes_key.data)
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
logger.warning("Secure memory not available, using standard encryption")
|
logger.warning("Secure memory not available, using standard encryption")
|
||||||
|
|
@ -495,20 +476,6 @@ class SecureCompletionClient:
|
||||||
if missing_fields:
|
if missing_fields:
|
||||||
raise ValueError(f"Missing required fields in encrypted package: {', '.join(missing_fields)}")
|
raise ValueError(f"Missing required fields in encrypted package: {', '.join(missing_fields)}")
|
||||||
|
|
||||||
# Validate version and algorithm to prevent downgrade attacks
|
|
||||||
SUPPORTED_VERSION = "1.0"
|
|
||||||
SUPPORTED_ALGORITHM = "hybrid-aes256-rsa4096"
|
|
||||||
if package["version"] != SUPPORTED_VERSION:
|
|
||||||
raise ValueError(
|
|
||||||
f"Unsupported protocol version: '{package['version']}'. "
|
|
||||||
f"Expected: '{SUPPORTED_VERSION}'"
|
|
||||||
)
|
|
||||||
if package["algorithm"] != SUPPORTED_ALGORITHM:
|
|
||||||
raise ValueError(
|
|
||||||
f"Unsupported encryption algorithm: '{package['algorithm']}'. "
|
|
||||||
f"Expected: '{SUPPORTED_ALGORITHM}'"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Validate encrypted_payload structure
|
# Validate encrypted_payload structure
|
||||||
if not isinstance(package["encrypted_payload"], dict):
|
if not isinstance(package["encrypted_payload"], dict):
|
||||||
raise ValueError("Invalid encrypted_payload: must be a dictionary")
|
raise ValueError("Invalid encrypted_payload: must be a dictionary")
|
||||||
|
|
@ -518,13 +485,9 @@ class SecureCompletionClient:
|
||||||
if missing_payload_fields:
|
if missing_payload_fields:
|
||||||
raise ValueError(f"Missing fields in encrypted_payload: {', '.join(missing_payload_fields)}")
|
raise ValueError(f"Missing fields in encrypted_payload: {', '.join(missing_payload_fields)}")
|
||||||
|
|
||||||
# Guard: private key must be initialized before attempting decryption
|
|
||||||
if self.private_key is None:
|
|
||||||
raise SecurityError("Private key not initialized. Call generate_keys() or load_keys() first.")
|
|
||||||
|
|
||||||
# Decrypt with proper error handling — keep crypto errors opaque (timing attacks)
|
# Decrypt with proper error handling — keep crypto errors opaque (timing attacks)
|
||||||
|
plaintext_json: Optional[str] = None
|
||||||
plaintext_size: int = 0
|
plaintext_size: int = 0
|
||||||
response: Optional[Dict[str, Any]] = None
|
|
||||||
try:
|
try:
|
||||||
# Decrypt AES key with private key
|
# Decrypt AES key with private key
|
||||||
encrypted_aes_key = base64.b64decode(package["encrypted_aes_key"])
|
encrypted_aes_key = base64.b64decode(package["encrypted_aes_key"])
|
||||||
|
|
@ -545,7 +508,7 @@ class SecureCompletionClient:
|
||||||
tag = base64.b64decode(package["encrypted_payload"]["tag"])
|
tag = base64.b64decode(package["encrypted_payload"]["tag"])
|
||||||
|
|
||||||
cipher = Cipher(
|
cipher = Cipher(
|
||||||
algorithms.AES(protected_aes_key.data),
|
algorithms.AES(bytes(protected_aes_key.data)),
|
||||||
modes.GCM(nonce, tag),
|
modes.GCM(nonce, tag),
|
||||||
backend=default_backend()
|
backend=default_backend()
|
||||||
)
|
)
|
||||||
|
|
@ -554,14 +517,12 @@ class SecureCompletionClient:
|
||||||
plaintext_size = len(plaintext_bytes)
|
plaintext_size = len(plaintext_bytes)
|
||||||
|
|
||||||
with secure_bytearray(plaintext_bytes) as protected_plaintext:
|
with secure_bytearray(plaintext_bytes) as protected_plaintext:
|
||||||
# Parse directly from bytearray — json.loads accepts bytearray
|
# NOTE: plaintext_json is a Python str (immutable) and cannot be
|
||||||
# (Python 3.6+), avoiding an immutable bytes/str copy that cannot
|
# securely zeroed. The bytearray source is zeroed by the context
|
||||||
# be zeroed. The bytearray is zeroed by the context manager on exit.
|
# manager, but the str object will persist until GC. This is a
|
||||||
try:
|
# known limitation of Python's memory model.
|
||||||
response = json.loads(protected_plaintext.data)
|
plaintext_json = bytes(protected_plaintext.data).decode('utf-8')
|
||||||
except (json.JSONDecodeError, UnicodeDecodeError) as e:
|
del plaintext_bytes # drop immutable bytes ref; secure copy already zeroed
|
||||||
raise ValueError(f"Decrypted response is not valid JSON: {e}")
|
|
||||||
del plaintext_bytes
|
|
||||||
# AES key automatically zeroed here
|
# AES key automatically zeroed here
|
||||||
else:
|
else:
|
||||||
logger.warning("Secure memory not available, using standard decryption")
|
logger.warning("Secure memory not available, using standard decryption")
|
||||||
|
|
@ -577,18 +538,19 @@ class SecureCompletionClient:
|
||||||
decryptor = cipher.decryptor()
|
decryptor = cipher.decryptor()
|
||||||
plaintext_bytes = decryptor.update(ciphertext) + decryptor.finalize()
|
plaintext_bytes = decryptor.update(ciphertext) + decryptor.finalize()
|
||||||
plaintext_size = len(plaintext_bytes)
|
plaintext_size = len(plaintext_bytes)
|
||||||
try:
|
plaintext_json = plaintext_bytes.decode('utf-8')
|
||||||
response = json.loads(plaintext_bytes)
|
|
||||||
except (json.JSONDecodeError, UnicodeDecodeError) as e:
|
|
||||||
raise ValueError(f"Decrypted response is not valid JSON: {e}")
|
|
||||||
del plaintext_bytes
|
del plaintext_bytes
|
||||||
|
|
||||||
except ValueError:
|
|
||||||
raise # Re-raise JSON parse errors without masking as SecurityError
|
|
||||||
except Exception:
|
except Exception:
|
||||||
# Don't leak specific decryption errors (timing attacks)
|
# Don't leak specific decryption errors (timing attacks)
|
||||||
raise SecurityError("Decryption failed: integrity check or authentication failed")
|
raise SecurityError("Decryption failed: integrity check or authentication failed")
|
||||||
|
|
||||||
|
# Parse JSON outside the crypto exception handler so format errors aren't hidden
|
||||||
|
try:
|
||||||
|
response = json.loads(plaintext_json)
|
||||||
|
except (json.JSONDecodeError, UnicodeDecodeError) as e:
|
||||||
|
raise ValueError(f"Decrypted response is not valid JSON: {e}")
|
||||||
|
|
||||||
# Add metadata for debugging
|
# Add metadata for debugging
|
||||||
if "_metadata" not in response:
|
if "_metadata" not in response:
|
||||||
response["_metadata"] = {}
|
response["_metadata"] = {}
|
||||||
|
|
@ -663,22 +625,13 @@ class SecureCompletionClient:
|
||||||
url = f"{self.router_url}/v1/chat/secure_completion"
|
url = f"{self.router_url}/v1/chat/secure_completion"
|
||||||
logger.debug("Target URL: %s", url)
|
logger.debug("Target URL: %s", url)
|
||||||
|
|
||||||
_RETRYABLE_STATUS_CODES = {429, 500, 502, 503, 504}
|
try:
|
||||||
last_exc: Exception = APIConnectionError("Request failed")
|
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||||
|
response = await client.post(
|
||||||
for attempt in range(self.max_retries + 1):
|
url,
|
||||||
if attempt > 0:
|
headers=headers,
|
||||||
delay = 2 ** (attempt - 1) # 1s, 2s, 4s, …
|
content=encrypted_payload
|
||||||
logger.warning("Retrying request (attempt %d/%d) after %.1fs...", attempt, self.max_retries, delay)
|
)
|
||||||
await asyncio.sleep(delay)
|
|
||||||
|
|
||||||
try:
|
|
||||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
|
||||||
response = await client.post(
|
|
||||||
url,
|
|
||||||
headers=headers,
|
|
||||||
content=encrypted_payload
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.debug("HTTP Status: %d", response.status_code)
|
logger.debug("HTTP Status: %d", response.status_code)
|
||||||
|
|
||||||
|
|
@ -689,6 +642,7 @@ class SecureCompletionClient:
|
||||||
return decrypted_response
|
return decrypted_response
|
||||||
|
|
||||||
elif response.status_code == 400:
|
elif response.status_code == 400:
|
||||||
|
# Bad request
|
||||||
try:
|
try:
|
||||||
error = response.json()
|
error = response.json()
|
||||||
raise InvalidRequestError(
|
raise InvalidRequestError(
|
||||||
|
|
@ -700,6 +654,7 @@ class SecureCompletionClient:
|
||||||
raise InvalidRequestError("Bad request: Invalid response format")
|
raise InvalidRequestError("Bad request: Invalid response format")
|
||||||
|
|
||||||
elif response.status_code == 401:
|
elif response.status_code == 401:
|
||||||
|
# Unauthorized - authentication failed
|
||||||
try:
|
try:
|
||||||
error = response.json()
|
error = response.json()
|
||||||
error_message = error.get('detail', 'Invalid API key or authentication failed')
|
error_message = error.get('detail', 'Invalid API key or authentication failed')
|
||||||
|
|
@ -712,6 +667,7 @@ class SecureCompletionClient:
|
||||||
raise AuthenticationError("Invalid API key or authentication failed")
|
raise AuthenticationError("Invalid API key or authentication failed")
|
||||||
|
|
||||||
elif response.status_code == 403:
|
elif response.status_code == 403:
|
||||||
|
# Forbidden - model not allowed for security tier
|
||||||
try:
|
try:
|
||||||
error = response.json()
|
error = response.json()
|
||||||
raise ForbiddenError(
|
raise ForbiddenError(
|
||||||
|
|
@ -723,6 +679,7 @@ class SecureCompletionClient:
|
||||||
raise ForbiddenError("Forbidden: Model not allowed for the requested security tier")
|
raise ForbiddenError("Forbidden: Model not allowed for the requested security tier")
|
||||||
|
|
||||||
elif response.status_code == 404:
|
elif response.status_code == 404:
|
||||||
|
# Endpoint not found
|
||||||
try:
|
try:
|
||||||
error = response.json()
|
error = response.json()
|
||||||
raise APIError(
|
raise APIError(
|
||||||
|
|
@ -733,47 +690,44 @@ class SecureCompletionClient:
|
||||||
except (json.JSONDecodeError, ValueError):
|
except (json.JSONDecodeError, ValueError):
|
||||||
raise APIError("Endpoint not found: Secure inference not enabled")
|
raise APIError("Endpoint not found: Secure inference not enabled")
|
||||||
|
|
||||||
elif response.status_code in _RETRYABLE_STATUS_CODES:
|
elif response.status_code == 429:
|
||||||
|
# Rate limit exceeded
|
||||||
try:
|
try:
|
||||||
error = response.json()
|
error = response.json()
|
||||||
if not isinstance(error, dict):
|
raise RateLimitError(
|
||||||
error = {"detail": "unknown"}
|
f"Rate limit exceeded: {error.get('detail', 'Too many requests')}",
|
||||||
detail_msg = error.get("detail", "unknown")
|
|
||||||
except (json.JSONDecodeError, ValueError):
|
|
||||||
error = {}
|
|
||||||
detail_msg = "unknown"
|
|
||||||
|
|
||||||
if response.status_code == 429:
|
|
||||||
last_exc = RateLimitError(
|
|
||||||
f"Rate limit exceeded: {detail_msg}",
|
|
||||||
status_code=429,
|
status_code=429,
|
||||||
error_details=error
|
error_details=error
|
||||||
)
|
)
|
||||||
elif response.status_code == 500:
|
except (json.JSONDecodeError, ValueError):
|
||||||
last_exc = ServerError(
|
raise RateLimitError("Rate limit exceeded: Too many requests")
|
||||||
f"Server error: {detail_msg}",
|
|
||||||
|
elif response.status_code == 500:
|
||||||
|
# Server error
|
||||||
|
try:
|
||||||
|
error = response.json()
|
||||||
|
raise ServerError(
|
||||||
|
f"Server error: {error.get('detail', 'Internal server error')}",
|
||||||
status_code=500,
|
status_code=500,
|
||||||
error_details=error
|
error_details=error
|
||||||
)
|
)
|
||||||
elif response.status_code == 503:
|
except (json.JSONDecodeError, ValueError):
|
||||||
last_exc = ServiceUnavailableError(
|
raise ServerError("Server error: Internal server error")
|
||||||
f"Service unavailable: {detail_msg}",
|
|
||||||
|
elif response.status_code == 503:
|
||||||
|
# Service unavailable - inference backend is down
|
||||||
|
try:
|
||||||
|
error = response.json()
|
||||||
|
raise ServiceUnavailableError(
|
||||||
|
f"Service unavailable: {error.get('detail', 'Inference backend is unavailable')}",
|
||||||
status_code=503,
|
status_code=503,
|
||||||
error_details=error
|
error_details=error
|
||||||
)
|
)
|
||||||
else:
|
except (json.JSONDecodeError, ValueError):
|
||||||
last_exc = APIError(
|
raise ServiceUnavailableError("Service unavailable: Inference backend is unavailable")
|
||||||
f"Unexpected status code: {response.status_code} {detail_msg}",
|
|
||||||
status_code=response.status_code,
|
|
||||||
error_details=error
|
|
||||||
)
|
|
||||||
|
|
||||||
if attempt < self.max_retries:
|
|
||||||
logger.warning("Got retryable status %d: %s", response.status_code, detail_msg)
|
|
||||||
continue
|
|
||||||
raise last_exc
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
# Unexpected status code
|
||||||
try:
|
try:
|
||||||
unexp_detail = response.json()
|
unexp_detail = response.json()
|
||||||
if not isinstance(unexp_detail, dict):
|
if not isinstance(unexp_detail, dict):
|
||||||
|
|
@ -786,17 +740,12 @@ class SecureCompletionClient:
|
||||||
status_code=response.status_code
|
status_code=response.status_code
|
||||||
)
|
)
|
||||||
|
|
||||||
except httpx.NetworkError as e:
|
except httpx.NetworkError as e:
|
||||||
last_exc = APIConnectionError(f"Failed to connect to router: {e}")
|
raise APIConnectionError(f"Failed to connect to router: {e}")
|
||||||
if attempt < self.max_retries:
|
except (SecurityError, APIError, AuthenticationError, InvalidRequestError, ForbiddenError, RateLimitError, ServerError, ServiceUnavailableError, APIConnectionError):
|
||||||
logger.warning("Network error on attempt %d: %s", attempt, e)
|
raise # Re-raise known exceptions
|
||||||
continue
|
except Exception as e:
|
||||||
raise last_exc
|
raise Exception(f"Request failed: {e}")
|
||||||
except (SecurityError, APIError, AuthenticationError, InvalidRequestError, ForbiddenError, RateLimitError, ServerError, ServiceUnavailableError, APIConnectionError):
|
|
||||||
raise # Non-retryable — propagate immediately
|
|
||||||
except Exception:
|
|
||||||
logger.exception("Unexpected error in send_secure_request")
|
|
||||||
raise APIConnectionError("Request failed due to an unexpected error")
|
|
||||||
|
|
||||||
def _validate_rsa_key(self, key, key_type: str = "private") -> None:
|
def _validate_rsa_key(self, key, key_type: str = "private") -> None:
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -51,6 +51,6 @@ try:
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
__version__ = "0.2.7"
|
__version__ = "0.2.4"
|
||||||
__author__ = "NOMYO AI"
|
__author__ = "NOMYO AI"
|
||||||
__license__ = "Apache-2.0"
|
__license__ = "Apache-2.0"
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,7 @@ class SecureChatCompletion:
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, base_url: str = "https://api.nomyo.ai", allow_http: bool = False, api_key: Optional[str] = None, secure_memory: bool = True, key_dir: Optional[str] = None, max_retries: int = 2):
|
def __init__(self, base_url: str = "https://api.nomyo.ai", allow_http: bool = False, api_key: Optional[str] = None, secure_memory: bool = True, key_dir: Optional[str] = None):
|
||||||
"""
|
"""
|
||||||
Initialize the secure chat completion client.
|
Initialize the secure chat completion client.
|
||||||
|
|
||||||
|
|
@ -68,10 +68,8 @@ class SecureChatCompletion:
|
||||||
Set to False for testing or when security is not required.
|
Set to False for testing or when security is not required.
|
||||||
key_dir: Directory to load/save RSA keys. If None, ephemeral keys are
|
key_dir: Directory to load/save RSA keys. If None, ephemeral keys are
|
||||||
generated in memory for this session only.
|
generated in memory for this session only.
|
||||||
max_retries: Number of retries on retryable errors (429, 500, 502, 503, 504,
|
|
||||||
network errors). Uses exponential backoff. Default 2.
|
|
||||||
"""
|
"""
|
||||||
self.client = SecureCompletionClient(router_url=base_url, allow_http=allow_http, secure_memory=secure_memory, max_retries=max_retries)
|
self.client = SecureCompletionClient(router_url=base_url, allow_http=allow_http, secure_memory=secure_memory)
|
||||||
self._keys_initialized = False
|
self._keys_initialized = False
|
||||||
self._keys_lock = asyncio.Lock()
|
self._keys_lock = asyncio.Lock()
|
||||||
self.api_key = api_key
|
self.api_key = api_key
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,10 @@
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["hatchling>=1.0.0", "wheel"]
|
requires = ["setuptools>=68", "wheel", "Cython>=3.0"]
|
||||||
build-backend = "hatchling.build"
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "nomyo"
|
name = "nomyo"
|
||||||
version = "0.2.7"
|
version = "0.2.4"
|
||||||
description = "OpenAI-compatible secure chat client with end-to-end encryption for NOMYO Inference Endpoints"
|
description = "OpenAI-compatible secure chat client with end-to-end encryption for NOMYO Inference Endpoints"
|
||||||
authors = [
|
authors = [
|
||||||
{name = "NOMYO.AI", email = "ichi@nomyo.ai"},
|
{name = "NOMYO.AI", email = "ichi@nomyo.ai"},
|
||||||
|
|
@ -46,8 +46,5 @@ Documentation = "https://bitfreedom.net/code/nomyo-ai/nomyo/wiki/NOMYO-Secure-Cl
|
||||||
Repository = "https://bitfreedom.net/code/nomyo-ai/nomyo"
|
Repository = "https://bitfreedom.net/code/nomyo-ai/nomyo"
|
||||||
Issues = "https://bitfreedom.net/code/nomyo-ai/nomyo/issues"
|
Issues = "https://bitfreedom.net/code/nomyo-ai/nomyo/issues"
|
||||||
|
|
||||||
[tool.hatch.build.targets.wheel]
|
[tool.setuptools.packages.find]
|
||||||
packages = ["nomyo"]
|
include = ["nomyo*"]
|
||||||
|
|
||||||
[tool.hatch.build.targets.sdist]
|
|
||||||
exclude = ["test/", "build.sh", "dist/"]
|
|
||||||
|
|
|
||||||
31
setup.py
Normal file
31
setup.py
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
from setuptools import setup
|
||||||
|
from setuptools.command.build_py import build_py as _build_py
|
||||||
|
from Cython.Build import cythonize
|
||||||
|
|
||||||
|
# Modules compiled to .so — exclude their .py source from the wheel
|
||||||
|
COMPILED_MODULES = {"nomyo", "SecureCompletionClient", "SecureMemory"}
|
||||||
|
|
||||||
|
|
||||||
|
class BuildPyNoPy(_build_py):
|
||||||
|
"""Skip copying .py source files for cythonized modules."""
|
||||||
|
|
||||||
|
def find_package_modules(self, package, package_dir):
|
||||||
|
modules = super().find_package_modules(package, package_dir)
|
||||||
|
return [
|
||||||
|
(pkg, mod, path)
|
||||||
|
for pkg, mod, path in modules
|
||||||
|
if not (pkg == "nomyo" and mod in COMPILED_MODULES)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
setup(
|
||||||
|
ext_modules=cythonize(
|
||||||
|
[
|
||||||
|
"nomyo/nomyo.py",
|
||||||
|
"nomyo/SecureCompletionClient.py",
|
||||||
|
"nomyo/SecureMemory.py",
|
||||||
|
],
|
||||||
|
compiler_directives={"language_level": "3"},
|
||||||
|
),
|
||||||
|
cmdclass={"build_py": BuildPyNoPy},
|
||||||
|
)
|
||||||
Loading…
Add table
Add a link
Reference in a new issue