diff --git a/README.md b/README.md index 1846d2e..c10b898 100644 --- a/README.md +++ b/README.md @@ -349,7 +349,8 @@ SecureChatCompletion( base_url: str = "https://api.nomyo.ai", allow_http: bool = False, api_key: Optional[str] = None, - secure_memory: bool = True + secure_memory: bool = True, + max_retries: int = 2 ) ``` @@ -359,6 +360,7 @@ SecureChatCompletion( - `allow_http`: Allow HTTP connections (ONLY for local development, never in production) - `api_key`: Optional API key for bearer authentication - `secure_memory`: Enable secure memory protection (default: True) +- `max_retries`: Retries on retryable errors (429, 500, 502, 503, 504, network errors) with exponential backoff. Default: 2 #### Methods @@ -370,7 +372,7 @@ SecureChatCompletion( #### Constructor ```python -SecureCompletionClient(router_url: str = "https://api.nomyo.ai") +SecureCompletionClient(router_url: str = "https://api.nomyo.ai", allow_http: bool = False, max_retries: int = 2) ``` #### Methods diff --git a/doc/api-reference.md b/doc/api-reference.md index 439e471..363c865 100644 --- a/doc/api-reference.md +++ b/doc/api-reference.md @@ -11,7 +11,8 @@ SecureChatCompletion( base_url: str = "https://api.nomyo.ai", allow_http: bool = False, api_key: Optional[str] = None, - secure_memory: bool = True + secure_memory: bool = True, + max_retries: int = 2 ) ``` @@ -21,6 +22,7 @@ SecureChatCompletion( - `allow_http` (bool): Allow HTTP connections (ONLY for local development, never in production) - `api_key` (Optional[str]): Optional API key for bearer authentication - `secure_memory` (bool): Enable secure memory protection (default: True) +- `max_retries` (int): Number of retries on retryable errors (429, 500, 502, 503, 504, network errors). Uses exponential backoff. Default: 2 ### Methods @@ -92,13 +94,18 @@ The `SecureCompletionClient` class handles the underlying encryption, key manage ### Constructor ```python -SecureCompletionClient(router_url: str = "https://api.nomyo.ai", allow_http: bool = False) +SecureCompletionClient( + router_url: str = "https://api.nomyo.ai", + allow_http: bool = False, + max_retries: int = 2 +) ``` **Parameters:** - `router_url` (str): Base URL of the NOMYO Router (must use HTTPS for production) - `allow_http` (bool): Allow HTTP connections (ONLY for local development, never in production) +- `max_retries` (int): Number of retries on retryable errors (429, 500, 502, 503, 504, network errors). Uses exponential backoff. Default: 2 ### Methods diff --git a/doc/rate-limits.md b/doc/rate-limits.md index 7d9f85c..2c18da2 100644 --- a/doc/rate-limits.md +++ b/doc/rate-limits.md @@ -48,20 +48,14 @@ HTTP/1.1 503 Service Unavailable - **Implement exponential backoff** when you receive a `429` response. Start with a short delay (e.g. 500 ms) and double it on each subsequent failure, up to a reasonable maximum. - **Monitor for `503` responses** — repeated occurrences indicate that your usage pattern is triggering the abuse threshold. Refactor your request logic before the cool-down expires. -## Example: Exponential Backoff +## Retry Behaviour + +The client retries automatically on `429`, `500`, `502`, `503`, `504`, and network errors using exponential backoff (1 s, 2 s, …). The default is **2 retries**. You can raise or disable this per client: ```python -import asyncio -import httpx +# More retries for high-throughput workloads +client = SecureChatCompletion(api_key="...", max_retries=5) -async def request_with_backoff(client, *args, max_retries=5, **kwargs): - delay = 0.5 - for attempt in range(max_retries): - response = await client.create(*args, **kwargs) - if response.status_code == 429: - await asyncio.sleep(delay) - delay = min(delay * 2, 30) - continue - return response - raise RuntimeError("Rate limit exceeded after maximum retries") +# Disable retries entirely +client = SecureChatCompletion(api_key="...", max_retries=0) ``` diff --git a/nomyo/SecureCompletionClient.py b/nomyo/SecureCompletionClient.py index ee81942..4c1d96d 100644 --- a/nomyo/SecureCompletionClient.py +++ b/nomyo/SecureCompletionClient.py @@ -1,4 +1,4 @@ -import ctypes, json, base64, urllib.parse, httpx, os, secrets, sys, warnings, logging +import asyncio, ctypes, json, base64, urllib.parse, httpx, os, secrets, sys, warnings, logging from typing import Dict, Any, Optional from cryptography.hazmat.primitives import serialization, hashes from cryptography.hazmat.primitives.asymmetric import rsa, padding @@ -76,7 +76,7 @@ class SecureCompletionClient: - Response parsing """ - def __init__(self, router_url: str = "https://api.nomyo.ai", allow_http: bool = False, secure_memory: bool = True): + def __init__(self, router_url: str = "https://api.nomyo.ai", allow_http: bool = False, secure_memory: bool = True, max_retries: int = 2): """ Initialize the secure completion client. @@ -84,6 +84,9 @@ class SecureCompletionClient: router_url: Base URL of the NOMYO Router (must use HTTPS for production) allow_http: Allow HTTP connections (ONLY for local development, never in production) secure_memory: Whether to use secure memory operations for this instance. + max_retries: Number of retries on retryable errors (429, 500, 502, 503, 504, + network errors). Uses exponential backoff. Default 2, matching + the OpenAI Python SDK default. """ self.router_url = router_url.rstrip('/') self.private_key = None @@ -91,6 +94,7 @@ class SecureCompletionClient: self.key_size = 4096 # RSA key size self.allow_http = allow_http # Store for use in fetch_server_public_key self._use_secure_memory = _SECURE_MEMORY_AVAILABLE and secure_memory + self.max_retries = max_retries # Validate HTTPS for security if not self.router_url.startswith("https://"): @@ -659,13 +663,22 @@ class SecureCompletionClient: url = f"{self.router_url}/v1/chat/secure_completion" logger.debug("Target URL: %s", url) - try: - async with httpx.AsyncClient(timeout=60.0) as client: - response = await client.post( - url, - headers=headers, - content=encrypted_payload - ) + _RETRYABLE_STATUS_CODES = {429, 500, 502, 503, 504} + last_exc: Exception = APIConnectionError("Request failed") + + for attempt in range(self.max_retries + 1): + if attempt > 0: + delay = 2 ** (attempt - 1) # 1s, 2s, 4s, … + logger.warning("Retrying request (attempt %d/%d) after %.1fs...", attempt, self.max_retries, delay) + await asyncio.sleep(delay) + + try: + async with httpx.AsyncClient(timeout=60.0) as client: + response = await client.post( + url, + headers=headers, + content=encrypted_payload + ) logger.debug("HTTP Status: %d", response.status_code) @@ -676,7 +689,6 @@ class SecureCompletionClient: return decrypted_response elif response.status_code == 400: - # Bad request try: error = response.json() raise InvalidRequestError( @@ -688,7 +700,6 @@ class SecureCompletionClient: raise InvalidRequestError("Bad request: Invalid response format") elif response.status_code == 401: - # Unauthorized - authentication failed try: error = response.json() error_message = error.get('detail', 'Invalid API key or authentication failed') @@ -701,7 +712,6 @@ class SecureCompletionClient: raise AuthenticationError("Invalid API key or authentication failed") elif response.status_code == 403: - # Forbidden - model not allowed for security tier try: error = response.json() raise ForbiddenError( @@ -713,7 +723,6 @@ class SecureCompletionClient: raise ForbiddenError("Forbidden: Model not allowed for the requested security tier") elif response.status_code == 404: - # Endpoint not found try: error = response.json() raise APIError( @@ -724,44 +733,47 @@ class SecureCompletionClient: except (json.JSONDecodeError, ValueError): raise APIError("Endpoint not found: Secure inference not enabled") - elif response.status_code == 429: - # Rate limit exceeded + elif response.status_code in _RETRYABLE_STATUS_CODES: try: error = response.json() - raise RateLimitError( - f"Rate limit exceeded: {error.get('detail', 'Too many requests')}", + if not isinstance(error, dict): + error = {"detail": "unknown"} + detail_msg = error.get("detail", "unknown") + except (json.JSONDecodeError, ValueError): + error = {} + detail_msg = "unknown" + + if response.status_code == 429: + last_exc = RateLimitError( + f"Rate limit exceeded: {detail_msg}", status_code=429, error_details=error ) - except (json.JSONDecodeError, ValueError): - raise RateLimitError("Rate limit exceeded: Too many requests") - - elif response.status_code == 500: - # Server error - try: - error = response.json() - raise ServerError( - f"Server error: {error.get('detail', 'Internal server error')}", + elif response.status_code == 500: + last_exc = ServerError( + f"Server error: {detail_msg}", status_code=500, error_details=error ) - except (json.JSONDecodeError, ValueError): - raise ServerError("Server error: Internal server error") - - elif response.status_code == 503: - # Service unavailable - inference backend is down - try: - error = response.json() - raise ServiceUnavailableError( - f"Service unavailable: {error.get('detail', 'Inference backend is unavailable')}", + elif response.status_code == 503: + last_exc = ServiceUnavailableError( + f"Service unavailable: {detail_msg}", status_code=503, error_details=error ) - except (json.JSONDecodeError, ValueError): - raise ServiceUnavailableError("Service unavailable: Inference backend is unavailable") + else: + last_exc = APIError( + f"Unexpected status code: {response.status_code} {detail_msg}", + status_code=response.status_code, + error_details=error + ) + + if attempt < self.max_retries: + logger.warning("Got retryable status %d: %s", response.status_code, detail_msg) + continue + raise last_exc else: - # Unexpected status code try: unexp_detail = response.json() if not isinstance(unexp_detail, dict): @@ -774,13 +786,17 @@ class SecureCompletionClient: status_code=response.status_code ) - except httpx.NetworkError as e: - raise APIConnectionError(f"Failed to connect to router: {e}") - except (SecurityError, APIError, AuthenticationError, InvalidRequestError, ForbiddenError, RateLimitError, ServerError, ServiceUnavailableError, APIConnectionError): - raise # Re-raise known exceptions - except Exception: - logger.exception("Unexpected error in send_secure_request") - raise APIConnectionError("Request failed due to an unexpected error") + except httpx.NetworkError as e: + last_exc = APIConnectionError(f"Failed to connect to router: {e}") + if attempt < self.max_retries: + logger.warning("Network error on attempt %d: %s", attempt, e) + continue + raise last_exc + except (SecurityError, APIError, AuthenticationError, InvalidRequestError, ForbiddenError, RateLimitError, ServerError, ServiceUnavailableError, APIConnectionError): + raise # Non-retryable — propagate immediately + except Exception: + logger.exception("Unexpected error in send_secure_request") + raise APIConnectionError("Request failed due to an unexpected error") def _validate_rsa_key(self, key, key_type: str = "private") -> None: """ diff --git a/nomyo/__init__.py b/nomyo/__init__.py index 0a81157..5773045 100644 --- a/nomyo/__init__.py +++ b/nomyo/__init__.py @@ -51,6 +51,6 @@ try: except ImportError: pass -__version__ = "0.2.5" +__version__ = "0.2.6" __author__ = "NOMYO AI" __license__ = "Apache-2.0" diff --git a/nomyo/nomyo.py b/nomyo/nomyo.py index 682997b..95709b2 100644 --- a/nomyo/nomyo.py +++ b/nomyo/nomyo.py @@ -52,7 +52,7 @@ class SecureChatCompletion: ``` """ - def __init__(self, base_url: str = "https://api.nomyo.ai", allow_http: bool = False, api_key: Optional[str] = None, secure_memory: bool = True, key_dir: Optional[str] = None): + def __init__(self, base_url: str = "https://api.nomyo.ai", allow_http: bool = False, api_key: Optional[str] = None, secure_memory: bool = True, key_dir: Optional[str] = None, max_retries: int = 2): """ Initialize the secure chat completion client. @@ -68,8 +68,10 @@ class SecureChatCompletion: Set to False for testing or when security is not required. key_dir: Directory to load/save RSA keys. If None, ephemeral keys are generated in memory for this session only. + max_retries: Number of retries on retryable errors (429, 500, 502, 503, 504, + network errors). Uses exponential backoff. Default 2. """ - self.client = SecureCompletionClient(router_url=base_url, allow_http=allow_http, secure_memory=secure_memory) + self.client = SecureCompletionClient(router_url=base_url, allow_http=allow_http, secure_memory=secure_memory, max_retries=max_retries) self._keys_initialized = False self._keys_lock = asyncio.Lock() self.api_key = api_key diff --git a/pyproject.toml b/pyproject.toml index 681ba9a..5576a31 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "nomyo" -version = "0.2.5" +version = "0.2.6" description = "OpenAI-compatible secure chat client with end-to-end encryption for NOMYO Inference Endpoints" authors = [ {name = "NOMYO.AI", email = "ichi@nomyo.ai"},