feat: add automatic client retry logic with exponential backoff
All checks were successful
Publish to PyPI / publish (push) Successful in 16s
All checks were successful
Publish to PyPI / publish (push) Successful in 16s
This commit is contained in:
parent
5040d181d2
commit
93adb6c45c
7 changed files with 87 additions and 66 deletions
|
|
@ -349,7 +349,8 @@ SecureChatCompletion(
|
|||
base_url: str = "https://api.nomyo.ai",
|
||||
allow_http: bool = False,
|
||||
api_key: Optional[str] = None,
|
||||
secure_memory: bool = True
|
||||
secure_memory: bool = True,
|
||||
max_retries: int = 2
|
||||
)
|
||||
```
|
||||
|
||||
|
|
@ -359,6 +360,7 @@ SecureChatCompletion(
|
|||
- `allow_http`: Allow HTTP connections (ONLY for local development, never in production)
|
||||
- `api_key`: Optional API key for bearer authentication
|
||||
- `secure_memory`: Enable secure memory protection (default: True)
|
||||
- `max_retries`: Retries on retryable errors (429, 500, 502, 503, 504, network errors) with exponential backoff. Default: 2
|
||||
|
||||
#### Methods
|
||||
|
||||
|
|
@ -370,7 +372,7 @@ SecureChatCompletion(
|
|||
#### Constructor
|
||||
|
||||
```python
|
||||
SecureCompletionClient(router_url: str = "https://api.nomyo.ai")
|
||||
SecureCompletionClient(router_url: str = "https://api.nomyo.ai", allow_http: bool = False, max_retries: int = 2)
|
||||
```
|
||||
|
||||
#### Methods
|
||||
|
|
|
|||
|
|
@ -11,7 +11,8 @@ SecureChatCompletion(
|
|||
base_url: str = "https://api.nomyo.ai",
|
||||
allow_http: bool = False,
|
||||
api_key: Optional[str] = None,
|
||||
secure_memory: bool = True
|
||||
secure_memory: bool = True,
|
||||
max_retries: int = 2
|
||||
)
|
||||
```
|
||||
|
||||
|
|
@ -21,6 +22,7 @@ SecureChatCompletion(
|
|||
- `allow_http` (bool): Allow HTTP connections (ONLY for local development, never in production)
|
||||
- `api_key` (Optional[str]): Optional API key for bearer authentication
|
||||
- `secure_memory` (bool): Enable secure memory protection (default: True)
|
||||
- `max_retries` (int): Number of retries on retryable errors (429, 500, 502, 503, 504, network errors). Uses exponential backoff. Default: 2
|
||||
|
||||
### Methods
|
||||
|
||||
|
|
@ -92,13 +94,18 @@ The `SecureCompletionClient` class handles the underlying encryption, key manage
|
|||
### Constructor
|
||||
|
||||
```python
|
||||
SecureCompletionClient(router_url: str = "https://api.nomyo.ai", allow_http: bool = False)
|
||||
SecureCompletionClient(
|
||||
router_url: str = "https://api.nomyo.ai",
|
||||
allow_http: bool = False,
|
||||
max_retries: int = 2
|
||||
)
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
|
||||
- `router_url` (str): Base URL of the NOMYO Router (must use HTTPS for production)
|
||||
- `allow_http` (bool): Allow HTTP connections (ONLY for local development, never in production)
|
||||
- `max_retries` (int): Number of retries on retryable errors (429, 500, 502, 503, 504, network errors). Uses exponential backoff. Default: 2
|
||||
|
||||
### Methods
|
||||
|
||||
|
|
|
|||
|
|
@ -48,20 +48,14 @@ HTTP/1.1 503 Service Unavailable
|
|||
- **Implement exponential backoff** when you receive a `429` response. Start with a short delay (e.g. 500 ms) and double it on each subsequent failure, up to a reasonable maximum.
|
||||
- **Monitor for `503` responses** — repeated occurrences indicate that your usage pattern is triggering the abuse threshold. Refactor your request logic before the cool-down expires.
|
||||
|
||||
## Example: Exponential Backoff
|
||||
## Retry Behaviour
|
||||
|
||||
The client retries automatically on `429`, `500`, `502`, `503`, `504`, and network errors using exponential backoff (1 s, 2 s, …). The default is **2 retries**. You can raise or disable this per client:
|
||||
|
||||
```python
|
||||
import asyncio
|
||||
import httpx
|
||||
# More retries for high-throughput workloads
|
||||
client = SecureChatCompletion(api_key="...", max_retries=5)
|
||||
|
||||
async def request_with_backoff(client, *args, max_retries=5, **kwargs):
|
||||
delay = 0.5
|
||||
for attempt in range(max_retries):
|
||||
response = await client.create(*args, **kwargs)
|
||||
if response.status_code == 429:
|
||||
await asyncio.sleep(delay)
|
||||
delay = min(delay * 2, 30)
|
||||
continue
|
||||
return response
|
||||
raise RuntimeError("Rate limit exceeded after maximum retries")
|
||||
# Disable retries entirely
|
||||
client = SecureChatCompletion(api_key="...", max_retries=0)
|
||||
```
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import ctypes, json, base64, urllib.parse, httpx, os, secrets, sys, warnings, logging
|
||||
import asyncio, ctypes, json, base64, urllib.parse, httpx, os, secrets, sys, warnings, logging
|
||||
from typing import Dict, Any, Optional
|
||||
from cryptography.hazmat.primitives import serialization, hashes
|
||||
from cryptography.hazmat.primitives.asymmetric import rsa, padding
|
||||
|
|
@ -76,7 +76,7 @@ class SecureCompletionClient:
|
|||
- Response parsing
|
||||
"""
|
||||
|
||||
def __init__(self, router_url: str = "https://api.nomyo.ai", allow_http: bool = False, secure_memory: bool = True):
|
||||
def __init__(self, router_url: str = "https://api.nomyo.ai", allow_http: bool = False, secure_memory: bool = True, max_retries: int = 2):
|
||||
"""
|
||||
Initialize the secure completion client.
|
||||
|
||||
|
|
@ -84,6 +84,9 @@ class SecureCompletionClient:
|
|||
router_url: Base URL of the NOMYO Router (must use HTTPS for production)
|
||||
allow_http: Allow HTTP connections (ONLY for local development, never in production)
|
||||
secure_memory: Whether to use secure memory operations for this instance.
|
||||
max_retries: Number of retries on retryable errors (429, 500, 502, 503, 504,
|
||||
network errors). Uses exponential backoff. Default 2, matching
|
||||
the OpenAI Python SDK default.
|
||||
"""
|
||||
self.router_url = router_url.rstrip('/')
|
||||
self.private_key = None
|
||||
|
|
@ -91,6 +94,7 @@ class SecureCompletionClient:
|
|||
self.key_size = 4096 # RSA key size
|
||||
self.allow_http = allow_http # Store for use in fetch_server_public_key
|
||||
self._use_secure_memory = _SECURE_MEMORY_AVAILABLE and secure_memory
|
||||
self.max_retries = max_retries
|
||||
|
||||
# Validate HTTPS for security
|
||||
if not self.router_url.startswith("https://"):
|
||||
|
|
@ -659,6 +663,15 @@ class SecureCompletionClient:
|
|||
url = f"{self.router_url}/v1/chat/secure_completion"
|
||||
logger.debug("Target URL: %s", url)
|
||||
|
||||
_RETRYABLE_STATUS_CODES = {429, 500, 502, 503, 504}
|
||||
last_exc: Exception = APIConnectionError("Request failed")
|
||||
|
||||
for attempt in range(self.max_retries + 1):
|
||||
if attempt > 0:
|
||||
delay = 2 ** (attempt - 1) # 1s, 2s, 4s, …
|
||||
logger.warning("Retrying request (attempt %d/%d) after %.1fs...", attempt, self.max_retries, delay)
|
||||
await asyncio.sleep(delay)
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
response = await client.post(
|
||||
|
|
@ -676,7 +689,6 @@ class SecureCompletionClient:
|
|||
return decrypted_response
|
||||
|
||||
elif response.status_code == 400:
|
||||
# Bad request
|
||||
try:
|
||||
error = response.json()
|
||||
raise InvalidRequestError(
|
||||
|
|
@ -688,7 +700,6 @@ class SecureCompletionClient:
|
|||
raise InvalidRequestError("Bad request: Invalid response format")
|
||||
|
||||
elif response.status_code == 401:
|
||||
# Unauthorized - authentication failed
|
||||
try:
|
||||
error = response.json()
|
||||
error_message = error.get('detail', 'Invalid API key or authentication failed')
|
||||
|
|
@ -701,7 +712,6 @@ class SecureCompletionClient:
|
|||
raise AuthenticationError("Invalid API key or authentication failed")
|
||||
|
||||
elif response.status_code == 403:
|
||||
# Forbidden - model not allowed for security tier
|
||||
try:
|
||||
error = response.json()
|
||||
raise ForbiddenError(
|
||||
|
|
@ -713,7 +723,6 @@ class SecureCompletionClient:
|
|||
raise ForbiddenError("Forbidden: Model not allowed for the requested security tier")
|
||||
|
||||
elif response.status_code == 404:
|
||||
# Endpoint not found
|
||||
try:
|
||||
error = response.json()
|
||||
raise APIError(
|
||||
|
|
@ -724,44 +733,47 @@ class SecureCompletionClient:
|
|||
except (json.JSONDecodeError, ValueError):
|
||||
raise APIError("Endpoint not found: Secure inference not enabled")
|
||||
|
||||
elif response.status_code == 429:
|
||||
# Rate limit exceeded
|
||||
elif response.status_code in _RETRYABLE_STATUS_CODES:
|
||||
try:
|
||||
error = response.json()
|
||||
raise RateLimitError(
|
||||
f"Rate limit exceeded: {error.get('detail', 'Too many requests')}",
|
||||
if not isinstance(error, dict):
|
||||
error = {"detail": "unknown"}
|
||||
detail_msg = error.get("detail", "unknown")
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
error = {}
|
||||
detail_msg = "unknown"
|
||||
|
||||
if response.status_code == 429:
|
||||
last_exc = RateLimitError(
|
||||
f"Rate limit exceeded: {detail_msg}",
|
||||
status_code=429,
|
||||
error_details=error
|
||||
)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
raise RateLimitError("Rate limit exceeded: Too many requests")
|
||||
|
||||
elif response.status_code == 500:
|
||||
# Server error
|
||||
try:
|
||||
error = response.json()
|
||||
raise ServerError(
|
||||
f"Server error: {error.get('detail', 'Internal server error')}",
|
||||
last_exc = ServerError(
|
||||
f"Server error: {detail_msg}",
|
||||
status_code=500,
|
||||
error_details=error
|
||||
)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
raise ServerError("Server error: Internal server error")
|
||||
|
||||
elif response.status_code == 503:
|
||||
# Service unavailable - inference backend is down
|
||||
try:
|
||||
error = response.json()
|
||||
raise ServiceUnavailableError(
|
||||
f"Service unavailable: {error.get('detail', 'Inference backend is unavailable')}",
|
||||
last_exc = ServiceUnavailableError(
|
||||
f"Service unavailable: {detail_msg}",
|
||||
status_code=503,
|
||||
error_details=error
|
||||
)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
raise ServiceUnavailableError("Service unavailable: Inference backend is unavailable")
|
||||
else:
|
||||
last_exc = APIError(
|
||||
f"Unexpected status code: {response.status_code} {detail_msg}",
|
||||
status_code=response.status_code,
|
||||
error_details=error
|
||||
)
|
||||
|
||||
if attempt < self.max_retries:
|
||||
logger.warning("Got retryable status %d: %s", response.status_code, detail_msg)
|
||||
continue
|
||||
raise last_exc
|
||||
|
||||
else:
|
||||
# Unexpected status code
|
||||
try:
|
||||
unexp_detail = response.json()
|
||||
if not isinstance(unexp_detail, dict):
|
||||
|
|
@ -775,9 +787,13 @@ class SecureCompletionClient:
|
|||
)
|
||||
|
||||
except httpx.NetworkError as e:
|
||||
raise APIConnectionError(f"Failed to connect to router: {e}")
|
||||
last_exc = APIConnectionError(f"Failed to connect to router: {e}")
|
||||
if attempt < self.max_retries:
|
||||
logger.warning("Network error on attempt %d: %s", attempt, e)
|
||||
continue
|
||||
raise last_exc
|
||||
except (SecurityError, APIError, AuthenticationError, InvalidRequestError, ForbiddenError, RateLimitError, ServerError, ServiceUnavailableError, APIConnectionError):
|
||||
raise # Re-raise known exceptions
|
||||
raise # Non-retryable — propagate immediately
|
||||
except Exception:
|
||||
logger.exception("Unexpected error in send_secure_request")
|
||||
raise APIConnectionError("Request failed due to an unexpected error")
|
||||
|
|
|
|||
|
|
@ -51,6 +51,6 @@ try:
|
|||
except ImportError:
|
||||
pass
|
||||
|
||||
__version__ = "0.2.5"
|
||||
__version__ = "0.2.6"
|
||||
__author__ = "NOMYO AI"
|
||||
__license__ = "Apache-2.0"
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ class SecureChatCompletion:
|
|||
```
|
||||
"""
|
||||
|
||||
def __init__(self, base_url: str = "https://api.nomyo.ai", allow_http: bool = False, api_key: Optional[str] = None, secure_memory: bool = True, key_dir: Optional[str] = None):
|
||||
def __init__(self, base_url: str = "https://api.nomyo.ai", allow_http: bool = False, api_key: Optional[str] = None, secure_memory: bool = True, key_dir: Optional[str] = None, max_retries: int = 2):
|
||||
"""
|
||||
Initialize the secure chat completion client.
|
||||
|
||||
|
|
@ -68,8 +68,10 @@ class SecureChatCompletion:
|
|||
Set to False for testing or when security is not required.
|
||||
key_dir: Directory to load/save RSA keys. If None, ephemeral keys are
|
||||
generated in memory for this session only.
|
||||
max_retries: Number of retries on retryable errors (429, 500, 502, 503, 504,
|
||||
network errors). Uses exponential backoff. Default 2.
|
||||
"""
|
||||
self.client = SecureCompletionClient(router_url=base_url, allow_http=allow_http, secure_memory=secure_memory)
|
||||
self.client = SecureCompletionClient(router_url=base_url, allow_http=allow_http, secure_memory=secure_memory, max_retries=max_retries)
|
||||
self._keys_initialized = False
|
||||
self._keys_lock = asyncio.Lock()
|
||||
self.api_key = api_key
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|||
|
||||
[project]
|
||||
name = "nomyo"
|
||||
version = "0.2.5"
|
||||
version = "0.2.6"
|
||||
description = "OpenAI-compatible secure chat client with end-to-end encryption for NOMYO Inference Endpoints"
|
||||
authors = [
|
||||
{name = "NOMYO.AI", email = "ichi@nomyo.ai"},
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue