feat: add automatic client retry logic with exponential backoff
All checks were successful
Publish to PyPI / publish (push) Successful in 16s

This commit is contained in:
Alpha Nerd 2026-04-15 12:08:21 +02:00
parent 5040d181d2
commit 93adb6c45c
Signed by: alpha-nerd
SSH key fingerprint: SHA256:QkkAgVoYi9TQ0UKPkiKSfnerZy2h4qhi3SVPXJmBN+M
7 changed files with 87 additions and 66 deletions

View file

@ -349,7 +349,8 @@ SecureChatCompletion(
base_url: str = "https://api.nomyo.ai", base_url: str = "https://api.nomyo.ai",
allow_http: bool = False, allow_http: bool = False,
api_key: Optional[str] = None, api_key: Optional[str] = None,
secure_memory: bool = True secure_memory: bool = True,
max_retries: int = 2
) )
``` ```
@ -359,6 +360,7 @@ SecureChatCompletion(
- `allow_http`: Allow HTTP connections (ONLY for local development, never in production) - `allow_http`: Allow HTTP connections (ONLY for local development, never in production)
- `api_key`: Optional API key for bearer authentication - `api_key`: Optional API key for bearer authentication
- `secure_memory`: Enable secure memory protection (default: True) - `secure_memory`: Enable secure memory protection (default: True)
- `max_retries`: Retries on retryable errors (429, 500, 502, 503, 504, network errors) with exponential backoff. Default: 2
#### Methods #### Methods
@ -370,7 +372,7 @@ SecureChatCompletion(
#### Constructor #### Constructor
```python ```python
SecureCompletionClient(router_url: str = "https://api.nomyo.ai") SecureCompletionClient(router_url: str = "https://api.nomyo.ai", allow_http: bool = False, max_retries: int = 2)
``` ```
#### Methods #### Methods

View file

@ -11,7 +11,8 @@ SecureChatCompletion(
base_url: str = "https://api.nomyo.ai", base_url: str = "https://api.nomyo.ai",
allow_http: bool = False, allow_http: bool = False,
api_key: Optional[str] = None, api_key: Optional[str] = None,
secure_memory: bool = True secure_memory: bool = True,
max_retries: int = 2
) )
``` ```
@ -21,6 +22,7 @@ SecureChatCompletion(
- `allow_http` (bool): Allow HTTP connections (ONLY for local development, never in production) - `allow_http` (bool): Allow HTTP connections (ONLY for local development, never in production)
- `api_key` (Optional[str]): Optional API key for bearer authentication - `api_key` (Optional[str]): Optional API key for bearer authentication
- `secure_memory` (bool): Enable secure memory protection (default: True) - `secure_memory` (bool): Enable secure memory protection (default: True)
- `max_retries` (int): Number of retries on retryable errors (429, 500, 502, 503, 504, network errors). Uses exponential backoff. Default: 2
### Methods ### Methods
@ -92,13 +94,18 @@ The `SecureCompletionClient` class handles the underlying encryption, key manage
### Constructor ### Constructor
```python ```python
SecureCompletionClient(router_url: str = "https://api.nomyo.ai", allow_http: bool = False) SecureCompletionClient(
router_url: str = "https://api.nomyo.ai",
allow_http: bool = False,
max_retries: int = 2
)
``` ```
**Parameters:** **Parameters:**
- `router_url` (str): Base URL of the NOMYO Router (must use HTTPS for production) - `router_url` (str): Base URL of the NOMYO Router (must use HTTPS for production)
- `allow_http` (bool): Allow HTTP connections (ONLY for local development, never in production) - `allow_http` (bool): Allow HTTP connections (ONLY for local development, never in production)
- `max_retries` (int): Number of retries on retryable errors (429, 500, 502, 503, 504, network errors). Uses exponential backoff. Default: 2
### Methods ### Methods

View file

@ -48,20 +48,14 @@ HTTP/1.1 503 Service Unavailable
- **Implement exponential backoff** when you receive a `429` response. Start with a short delay (e.g. 500 ms) and double it on each subsequent failure, up to a reasonable maximum. - **Implement exponential backoff** when you receive a `429` response. Start with a short delay (e.g. 500 ms) and double it on each subsequent failure, up to a reasonable maximum.
- **Monitor for `503` responses** — repeated occurrences indicate that your usage pattern is triggering the abuse threshold. Refactor your request logic before the cool-down expires. - **Monitor for `503` responses** — repeated occurrences indicate that your usage pattern is triggering the abuse threshold. Refactor your request logic before the cool-down expires.
## Example: Exponential Backoff ## Retry Behaviour
The client retries automatically on `429`, `500`, `502`, `503`, `504`, and network errors using exponential backoff (1 s, 2 s, …). The default is **2 retries**. You can raise or disable this per client:
```python ```python
import asyncio # More retries for high-throughput workloads
import httpx client = SecureChatCompletion(api_key="...", max_retries=5)
async def request_with_backoff(client, *args, max_retries=5, **kwargs): # Disable retries entirely
delay = 0.5 client = SecureChatCompletion(api_key="...", max_retries=0)
for attempt in range(max_retries):
response = await client.create(*args, **kwargs)
if response.status_code == 429:
await asyncio.sleep(delay)
delay = min(delay * 2, 30)
continue
return response
raise RuntimeError("Rate limit exceeded after maximum retries")
``` ```

View file

@ -1,4 +1,4 @@
import ctypes, json, base64, urllib.parse, httpx, os, secrets, sys, warnings, logging import asyncio, ctypes, json, base64, urllib.parse, httpx, os, secrets, sys, warnings, logging
from typing import Dict, Any, Optional from typing import Dict, Any, Optional
from cryptography.hazmat.primitives import serialization, hashes from cryptography.hazmat.primitives import serialization, hashes
from cryptography.hazmat.primitives.asymmetric import rsa, padding from cryptography.hazmat.primitives.asymmetric import rsa, padding
@ -76,7 +76,7 @@ class SecureCompletionClient:
- Response parsing - Response parsing
""" """
def __init__(self, router_url: str = "https://api.nomyo.ai", allow_http: bool = False, secure_memory: bool = True): def __init__(self, router_url: str = "https://api.nomyo.ai", allow_http: bool = False, secure_memory: bool = True, max_retries: int = 2):
""" """
Initialize the secure completion client. Initialize the secure completion client.
@ -84,6 +84,9 @@ class SecureCompletionClient:
router_url: Base URL of the NOMYO Router (must use HTTPS for production) router_url: Base URL of the NOMYO Router (must use HTTPS for production)
allow_http: Allow HTTP connections (ONLY for local development, never in production) allow_http: Allow HTTP connections (ONLY for local development, never in production)
secure_memory: Whether to use secure memory operations for this instance. secure_memory: Whether to use secure memory operations for this instance.
max_retries: Number of retries on retryable errors (429, 500, 502, 503, 504,
network errors). Uses exponential backoff. Default 2, matching
the OpenAI Python SDK default.
""" """
self.router_url = router_url.rstrip('/') self.router_url = router_url.rstrip('/')
self.private_key = None self.private_key = None
@ -91,6 +94,7 @@ class SecureCompletionClient:
self.key_size = 4096 # RSA key size self.key_size = 4096 # RSA key size
self.allow_http = allow_http # Store for use in fetch_server_public_key self.allow_http = allow_http # Store for use in fetch_server_public_key
self._use_secure_memory = _SECURE_MEMORY_AVAILABLE and secure_memory self._use_secure_memory = _SECURE_MEMORY_AVAILABLE and secure_memory
self.max_retries = max_retries
# Validate HTTPS for security # Validate HTTPS for security
if not self.router_url.startswith("https://"): if not self.router_url.startswith("https://"):
@ -659,6 +663,15 @@ class SecureCompletionClient:
url = f"{self.router_url}/v1/chat/secure_completion" url = f"{self.router_url}/v1/chat/secure_completion"
logger.debug("Target URL: %s", url) logger.debug("Target URL: %s", url)
_RETRYABLE_STATUS_CODES = {429, 500, 502, 503, 504}
last_exc: Exception = APIConnectionError("Request failed")
for attempt in range(self.max_retries + 1):
if attempt > 0:
delay = 2 ** (attempt - 1) # 1s, 2s, 4s, …
logger.warning("Retrying request (attempt %d/%d) after %.1fs...", attempt, self.max_retries, delay)
await asyncio.sleep(delay)
try: try:
async with httpx.AsyncClient(timeout=60.0) as client: async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.post( response = await client.post(
@ -676,7 +689,6 @@ class SecureCompletionClient:
return decrypted_response return decrypted_response
elif response.status_code == 400: elif response.status_code == 400:
# Bad request
try: try:
error = response.json() error = response.json()
raise InvalidRequestError( raise InvalidRequestError(
@ -688,7 +700,6 @@ class SecureCompletionClient:
raise InvalidRequestError("Bad request: Invalid response format") raise InvalidRequestError("Bad request: Invalid response format")
elif response.status_code == 401: elif response.status_code == 401:
# Unauthorized - authentication failed
try: try:
error = response.json() error = response.json()
error_message = error.get('detail', 'Invalid API key or authentication failed') error_message = error.get('detail', 'Invalid API key or authentication failed')
@ -701,7 +712,6 @@ class SecureCompletionClient:
raise AuthenticationError("Invalid API key or authentication failed") raise AuthenticationError("Invalid API key or authentication failed")
elif response.status_code == 403: elif response.status_code == 403:
# Forbidden - model not allowed for security tier
try: try:
error = response.json() error = response.json()
raise ForbiddenError( raise ForbiddenError(
@ -713,7 +723,6 @@ class SecureCompletionClient:
raise ForbiddenError("Forbidden: Model not allowed for the requested security tier") raise ForbiddenError("Forbidden: Model not allowed for the requested security tier")
elif response.status_code == 404: elif response.status_code == 404:
# Endpoint not found
try: try:
error = response.json() error = response.json()
raise APIError( raise APIError(
@ -724,44 +733,47 @@ class SecureCompletionClient:
except (json.JSONDecodeError, ValueError): except (json.JSONDecodeError, ValueError):
raise APIError("Endpoint not found: Secure inference not enabled") raise APIError("Endpoint not found: Secure inference not enabled")
elif response.status_code == 429: elif response.status_code in _RETRYABLE_STATUS_CODES:
# Rate limit exceeded
try: try:
error = response.json() error = response.json()
raise RateLimitError( if not isinstance(error, dict):
f"Rate limit exceeded: {error.get('detail', 'Too many requests')}", error = {"detail": "unknown"}
detail_msg = error.get("detail", "unknown")
except (json.JSONDecodeError, ValueError):
error = {}
detail_msg = "unknown"
if response.status_code == 429:
last_exc = RateLimitError(
f"Rate limit exceeded: {detail_msg}",
status_code=429, status_code=429,
error_details=error error_details=error
) )
except (json.JSONDecodeError, ValueError):
raise RateLimitError("Rate limit exceeded: Too many requests")
elif response.status_code == 500: elif response.status_code == 500:
# Server error last_exc = ServerError(
try: f"Server error: {detail_msg}",
error = response.json()
raise ServerError(
f"Server error: {error.get('detail', 'Internal server error')}",
status_code=500, status_code=500,
error_details=error error_details=error
) )
except (json.JSONDecodeError, ValueError):
raise ServerError("Server error: Internal server error")
elif response.status_code == 503: elif response.status_code == 503:
# Service unavailable - inference backend is down last_exc = ServiceUnavailableError(
try: f"Service unavailable: {detail_msg}",
error = response.json()
raise ServiceUnavailableError(
f"Service unavailable: {error.get('detail', 'Inference backend is unavailable')}",
status_code=503, status_code=503,
error_details=error error_details=error
) )
except (json.JSONDecodeError, ValueError): else:
raise ServiceUnavailableError("Service unavailable: Inference backend is unavailable") last_exc = APIError(
f"Unexpected status code: {response.status_code} {detail_msg}",
status_code=response.status_code,
error_details=error
)
if attempt < self.max_retries:
logger.warning("Got retryable status %d: %s", response.status_code, detail_msg)
continue
raise last_exc
else: else:
# Unexpected status code
try: try:
unexp_detail = response.json() unexp_detail = response.json()
if not isinstance(unexp_detail, dict): if not isinstance(unexp_detail, dict):
@ -775,9 +787,13 @@ class SecureCompletionClient:
) )
except httpx.NetworkError as e: except httpx.NetworkError as e:
raise APIConnectionError(f"Failed to connect to router: {e}") last_exc = APIConnectionError(f"Failed to connect to router: {e}")
if attempt < self.max_retries:
logger.warning("Network error on attempt %d: %s", attempt, e)
continue
raise last_exc
except (SecurityError, APIError, AuthenticationError, InvalidRequestError, ForbiddenError, RateLimitError, ServerError, ServiceUnavailableError, APIConnectionError): except (SecurityError, APIError, AuthenticationError, InvalidRequestError, ForbiddenError, RateLimitError, ServerError, ServiceUnavailableError, APIConnectionError):
raise # Re-raise known exceptions raise # Non-retryable — propagate immediately
except Exception: except Exception:
logger.exception("Unexpected error in send_secure_request") logger.exception("Unexpected error in send_secure_request")
raise APIConnectionError("Request failed due to an unexpected error") raise APIConnectionError("Request failed due to an unexpected error")

View file

@ -51,6 +51,6 @@ try:
except ImportError: except ImportError:
pass pass
__version__ = "0.2.5" __version__ = "0.2.6"
__author__ = "NOMYO AI" __author__ = "NOMYO AI"
__license__ = "Apache-2.0" __license__ = "Apache-2.0"

View file

@ -52,7 +52,7 @@ class SecureChatCompletion:
``` ```
""" """
def __init__(self, base_url: str = "https://api.nomyo.ai", allow_http: bool = False, api_key: Optional[str] = None, secure_memory: bool = True, key_dir: Optional[str] = None): def __init__(self, base_url: str = "https://api.nomyo.ai", allow_http: bool = False, api_key: Optional[str] = None, secure_memory: bool = True, key_dir: Optional[str] = None, max_retries: int = 2):
""" """
Initialize the secure chat completion client. Initialize the secure chat completion client.
@ -68,8 +68,10 @@ class SecureChatCompletion:
Set to False for testing or when security is not required. Set to False for testing or when security is not required.
key_dir: Directory to load/save RSA keys. If None, ephemeral keys are key_dir: Directory to load/save RSA keys. If None, ephemeral keys are
generated in memory for this session only. generated in memory for this session only.
max_retries: Number of retries on retryable errors (429, 500, 502, 503, 504,
network errors). Uses exponential backoff. Default 2.
""" """
self.client = SecureCompletionClient(router_url=base_url, allow_http=allow_http, secure_memory=secure_memory) self.client = SecureCompletionClient(router_url=base_url, allow_http=allow_http, secure_memory=secure_memory, max_retries=max_retries)
self._keys_initialized = False self._keys_initialized = False
self._keys_lock = asyncio.Lock() self._keys_lock = asyncio.Lock()
self.api_key = api_key self.api_key = api_key

View file

@ -4,7 +4,7 @@ build-backend = "hatchling.build"
[project] [project]
name = "nomyo" name = "nomyo"
version = "0.2.5" version = "0.2.6"
description = "OpenAI-compatible secure chat client with end-to-end encryption for NOMYO Inference Endpoints" description = "OpenAI-compatible secure chat client with end-to-end encryption for NOMYO Inference Endpoints"
authors = [ authors = [
{name = "NOMYO.AI", email = "ichi@nomyo.ai"}, {name = "NOMYO.AI", email = "ichi@nomyo.ai"},