feat: add automatic client retry logic with exponential backoff
All checks were successful
Publish to PyPI / publish (push) Successful in 16s

This commit is contained in:
Alpha Nerd 2026-04-15 12:08:21 +02:00
parent 5040d181d2
commit 93adb6c45c
Signed by: alpha-nerd
SSH key fingerprint: SHA256:QkkAgVoYi9TQ0UKPkiKSfnerZy2h4qhi3SVPXJmBN+M
7 changed files with 87 additions and 66 deletions

View file

@ -349,7 +349,8 @@ SecureChatCompletion(
base_url: str = "https://api.nomyo.ai",
allow_http: bool = False,
api_key: Optional[str] = None,
secure_memory: bool = True
secure_memory: bool = True,
max_retries: int = 2
)
```
@ -359,6 +360,7 @@ SecureChatCompletion(
- `allow_http`: Allow HTTP connections (ONLY for local development, never in production)
- `api_key`: Optional API key for bearer authentication
- `secure_memory`: Enable secure memory protection (default: True)
- `max_retries`: Retries on retryable errors (429, 500, 502, 503, 504, network errors) with exponential backoff. Default: 2
#### Methods
@ -370,7 +372,7 @@ SecureChatCompletion(
#### Constructor
```python
SecureCompletionClient(router_url: str = "https://api.nomyo.ai")
SecureCompletionClient(router_url: str = "https://api.nomyo.ai", allow_http: bool = False, max_retries: int = 2)
```
#### Methods

View file

@ -11,7 +11,8 @@ SecureChatCompletion(
base_url: str = "https://api.nomyo.ai",
allow_http: bool = False,
api_key: Optional[str] = None,
secure_memory: bool = True
secure_memory: bool = True,
max_retries: int = 2
)
```
@ -21,6 +22,7 @@ SecureChatCompletion(
- `allow_http` (bool): Allow HTTP connections (ONLY for local development, never in production)
- `api_key` (Optional[str]): Optional API key for bearer authentication
- `secure_memory` (bool): Enable secure memory protection (default: True)
- `max_retries` (int): Number of retries on retryable errors (429, 500, 502, 503, 504, network errors). Uses exponential backoff. Default: 2
### Methods
@ -92,13 +94,18 @@ The `SecureCompletionClient` class handles the underlying encryption, key manage
### Constructor
```python
SecureCompletionClient(router_url: str = "https://api.nomyo.ai", allow_http: bool = False)
SecureCompletionClient(
router_url: str = "https://api.nomyo.ai",
allow_http: bool = False,
max_retries: int = 2
)
```
**Parameters:**
- `router_url` (str): Base URL of the NOMYO Router (must use HTTPS for production)
- `allow_http` (bool): Allow HTTP connections (ONLY for local development, never in production)
- `max_retries` (int): Number of retries on retryable errors (429, 500, 502, 503, 504, network errors). Uses exponential backoff. Default: 2
### Methods

View file

@ -48,20 +48,14 @@ HTTP/1.1 503 Service Unavailable
- **Implement exponential backoff** when you receive a `429` response. Start with a short delay (e.g. 500 ms) and double it on each subsequent failure, up to a reasonable maximum.
- **Monitor for `503` responses** — repeated occurrences indicate that your usage pattern is triggering the abuse threshold. Refactor your request logic before the cool-down expires.
## Example: Exponential Backoff
## Retry Behaviour
The client retries automatically on `429`, `500`, `502`, `503`, `504`, and network errors using exponential backoff (1 s, 2 s, …). The default is **2 retries**. You can raise or disable this per client:
```python
import asyncio
import httpx
# More retries for high-throughput workloads
client = SecureChatCompletion(api_key="...", max_retries=5)
async def request_with_backoff(client, *args, max_retries=5, **kwargs):
delay = 0.5
for attempt in range(max_retries):
response = await client.create(*args, **kwargs)
if response.status_code == 429:
await asyncio.sleep(delay)
delay = min(delay * 2, 30)
continue
return response
raise RuntimeError("Rate limit exceeded after maximum retries")
# Disable retries entirely
client = SecureChatCompletion(api_key="...", max_retries=0)
```

View file

@ -1,4 +1,4 @@
import ctypes, json, base64, urllib.parse, httpx, os, secrets, sys, warnings, logging
import asyncio, ctypes, json, base64, urllib.parse, httpx, os, secrets, sys, warnings, logging
from typing import Dict, Any, Optional
from cryptography.hazmat.primitives import serialization, hashes
from cryptography.hazmat.primitives.asymmetric import rsa, padding
@ -76,7 +76,7 @@ class SecureCompletionClient:
- Response parsing
"""
def __init__(self, router_url: str = "https://api.nomyo.ai", allow_http: bool = False, secure_memory: bool = True):
def __init__(self, router_url: str = "https://api.nomyo.ai", allow_http: bool = False, secure_memory: bool = True, max_retries: int = 2):
"""
Initialize the secure completion client.
@ -84,6 +84,9 @@ class SecureCompletionClient:
router_url: Base URL of the NOMYO Router (must use HTTPS for production)
allow_http: Allow HTTP connections (ONLY for local development, never in production)
secure_memory: Whether to use secure memory operations for this instance.
max_retries: Number of retries on retryable errors (429, 500, 502, 503, 504,
network errors). Uses exponential backoff. Default 2, matching
the OpenAI Python SDK default.
"""
self.router_url = router_url.rstrip('/')
self.private_key = None
@ -91,6 +94,7 @@ class SecureCompletionClient:
self.key_size = 4096 # RSA key size
self.allow_http = allow_http # Store for use in fetch_server_public_key
self._use_secure_memory = _SECURE_MEMORY_AVAILABLE and secure_memory
self.max_retries = max_retries
# Validate HTTPS for security
if not self.router_url.startswith("https://"):
@ -659,13 +663,22 @@ class SecureCompletionClient:
url = f"{self.router_url}/v1/chat/secure_completion"
logger.debug("Target URL: %s", url)
try:
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.post(
url,
headers=headers,
content=encrypted_payload
)
_RETRYABLE_STATUS_CODES = {429, 500, 502, 503, 504}
last_exc: Exception = APIConnectionError("Request failed")
for attempt in range(self.max_retries + 1):
if attempt > 0:
delay = 2 ** (attempt - 1) # 1s, 2s, 4s, …
logger.warning("Retrying request (attempt %d/%d) after %.1fs...", attempt, self.max_retries, delay)
await asyncio.sleep(delay)
try:
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.post(
url,
headers=headers,
content=encrypted_payload
)
logger.debug("HTTP Status: %d", response.status_code)
@ -676,7 +689,6 @@ class SecureCompletionClient:
return decrypted_response
elif response.status_code == 400:
# Bad request
try:
error = response.json()
raise InvalidRequestError(
@ -688,7 +700,6 @@ class SecureCompletionClient:
raise InvalidRequestError("Bad request: Invalid response format")
elif response.status_code == 401:
# Unauthorized - authentication failed
try:
error = response.json()
error_message = error.get('detail', 'Invalid API key or authentication failed')
@ -701,7 +712,6 @@ class SecureCompletionClient:
raise AuthenticationError("Invalid API key or authentication failed")
elif response.status_code == 403:
# Forbidden - model not allowed for security tier
try:
error = response.json()
raise ForbiddenError(
@ -713,7 +723,6 @@ class SecureCompletionClient:
raise ForbiddenError("Forbidden: Model not allowed for the requested security tier")
elif response.status_code == 404:
# Endpoint not found
try:
error = response.json()
raise APIError(
@ -724,44 +733,47 @@ class SecureCompletionClient:
except (json.JSONDecodeError, ValueError):
raise APIError("Endpoint not found: Secure inference not enabled")
elif response.status_code == 429:
# Rate limit exceeded
elif response.status_code in _RETRYABLE_STATUS_CODES:
try:
error = response.json()
raise RateLimitError(
f"Rate limit exceeded: {error.get('detail', 'Too many requests')}",
if not isinstance(error, dict):
error = {"detail": "unknown"}
detail_msg = error.get("detail", "unknown")
except (json.JSONDecodeError, ValueError):
error = {}
detail_msg = "unknown"
if response.status_code == 429:
last_exc = RateLimitError(
f"Rate limit exceeded: {detail_msg}",
status_code=429,
error_details=error
)
except (json.JSONDecodeError, ValueError):
raise RateLimitError("Rate limit exceeded: Too many requests")
elif response.status_code == 500:
# Server error
try:
error = response.json()
raise ServerError(
f"Server error: {error.get('detail', 'Internal server error')}",
elif response.status_code == 500:
last_exc = ServerError(
f"Server error: {detail_msg}",
status_code=500,
error_details=error
)
except (json.JSONDecodeError, ValueError):
raise ServerError("Server error: Internal server error")
elif response.status_code == 503:
# Service unavailable - inference backend is down
try:
error = response.json()
raise ServiceUnavailableError(
f"Service unavailable: {error.get('detail', 'Inference backend is unavailable')}",
elif response.status_code == 503:
last_exc = ServiceUnavailableError(
f"Service unavailable: {detail_msg}",
status_code=503,
error_details=error
)
except (json.JSONDecodeError, ValueError):
raise ServiceUnavailableError("Service unavailable: Inference backend is unavailable")
else:
last_exc = APIError(
f"Unexpected status code: {response.status_code} {detail_msg}",
status_code=response.status_code,
error_details=error
)
if attempt < self.max_retries:
logger.warning("Got retryable status %d: %s", response.status_code, detail_msg)
continue
raise last_exc
else:
# Unexpected status code
try:
unexp_detail = response.json()
if not isinstance(unexp_detail, dict):
@ -774,13 +786,17 @@ class SecureCompletionClient:
status_code=response.status_code
)
except httpx.NetworkError as e:
raise APIConnectionError(f"Failed to connect to router: {e}")
except (SecurityError, APIError, AuthenticationError, InvalidRequestError, ForbiddenError, RateLimitError, ServerError, ServiceUnavailableError, APIConnectionError):
raise # Re-raise known exceptions
except Exception:
logger.exception("Unexpected error in send_secure_request")
raise APIConnectionError("Request failed due to an unexpected error")
except httpx.NetworkError as e:
last_exc = APIConnectionError(f"Failed to connect to router: {e}")
if attempt < self.max_retries:
logger.warning("Network error on attempt %d: %s", attempt, e)
continue
raise last_exc
except (SecurityError, APIError, AuthenticationError, InvalidRequestError, ForbiddenError, RateLimitError, ServerError, ServiceUnavailableError, APIConnectionError):
raise # Non-retryable — propagate immediately
except Exception:
logger.exception("Unexpected error in send_secure_request")
raise APIConnectionError("Request failed due to an unexpected error")
def _validate_rsa_key(self, key, key_type: str = "private") -> None:
"""

View file

@ -51,6 +51,6 @@ try:
except ImportError:
pass
__version__ = "0.2.5"
__version__ = "0.2.6"
__author__ = "NOMYO AI"
__license__ = "Apache-2.0"

View file

@ -52,7 +52,7 @@ class SecureChatCompletion:
```
"""
def __init__(self, base_url: str = "https://api.nomyo.ai", allow_http: bool = False, api_key: Optional[str] = None, secure_memory: bool = True, key_dir: Optional[str] = None):
def __init__(self, base_url: str = "https://api.nomyo.ai", allow_http: bool = False, api_key: Optional[str] = None, secure_memory: bool = True, key_dir: Optional[str] = None, max_retries: int = 2):
"""
Initialize the secure chat completion client.
@ -68,8 +68,10 @@ class SecureChatCompletion:
Set to False for testing or when security is not required.
key_dir: Directory to load/save RSA keys. If None, ephemeral keys are
generated in memory for this session only.
max_retries: Number of retries on retryable errors (429, 500, 502, 503, 504,
network errors). Uses exponential backoff. Default 2.
"""
self.client = SecureCompletionClient(router_url=base_url, allow_http=allow_http, secure_memory=secure_memory)
self.client = SecureCompletionClient(router_url=base_url, allow_http=allow_http, secure_memory=secure_memory, max_retries=max_retries)
self._keys_initialized = False
self._keys_lock = asyncio.Lock()
self.api_key = api_key

View file

@ -4,7 +4,7 @@ build-backend = "hatchling.build"
[project]
name = "nomyo"
version = "0.2.5"
version = "0.2.6"
description = "OpenAI-compatible secure chat client with end-to-end encryption for NOMYO Inference Endpoints"
authors = [
{name = "NOMYO.AI", email = "ichi@nomyo.ai"},