feat: add automatic client retry logic with exponential backoff

2026-04-15 12:08:21 +02:00 · 2026-04-15 12:08:21 +02:00 · 93adb6c45c
commit 93adb6c45c
parent 5040d181d2
7 changed files with 87 additions and 66 deletions
--- a/README.md
+++ b/README.md
@ -349,7 +349,8 @@ SecureChatCompletion(
    base_url: str = "https://api.nomyo.ai",
    allow_http: bool = False,
    api_key: Optional[str] = None,
-    secure_memory: bool = True
+    secure_memory: bool = True,
    max_retries: int = 2
 )
 ```
@ -359,6 +360,7 @@ SecureChatCompletion(
 - `allow_http`: Allow HTTP connections (ONLY for local development, never in production)
 - `api_key`: Optional API key for bearer authentication
 - `secure_memory`: Enable secure memory protection (default: True)
 - `max_retries`: Retries on retryable errors (429, 500, 502, 503, 504, network errors) with exponential backoff. Default: 2
 #### Methods
@ -370,7 +372,7 @@ SecureChatCompletion(
 #### Constructor
 ```python
-SecureCompletionClient(router_url: str = "https://api.nomyo.ai")
+SecureCompletionClient(router_url: str = "https://api.nomyo.ai", allow_http: bool = False, max_retries: int = 2)
 ```
 #### Methods
--- a/doc/api-reference.md
+++ b/doc/api-reference.md
@ -11,7 +11,8 @@ SecureChatCompletion(
    base_url: str = "https://api.nomyo.ai",
    allow_http: bool = False,
    api_key: Optional[str] = None,
-    secure_memory: bool = True
+    secure_memory: bool = True,
    max_retries: int = 2
 )
 ```
@ -21,6 +22,7 @@ SecureChatCompletion(
 - `allow_http` (bool): Allow HTTP connections (ONLY for local development, never in production)
 - `api_key` (Optional[str]): Optional API key for bearer authentication
 - `secure_memory` (bool): Enable secure memory protection (default: True)
 - `max_retries` (int): Number of retries on retryable errors (429, 500, 502, 503, 504, network errors). Uses exponential backoff. Default: 2
 ### Methods
@ -92,13 +94,18 @@ The `SecureCompletionClient` class handles the underlying encryption, key manage
 ### Constructor
 ```python
-SecureCompletionClient(router_url: str = "https://api.nomyo.ai", allow_http: bool = False)
+SecureCompletionClient(
    router_url: str = "https://api.nomyo.ai",
    allow_http: bool = False,
    max_retries: int = 2
 )
 ```
 **Parameters:**
 - `router_url` (str): Base URL of the NOMYO Router (must use HTTPS for production)
 - `allow_http` (bool): Allow HTTP connections (ONLY for local development, never in production)
 - `max_retries` (int): Number of retries on retryable errors (429, 500, 502, 503, 504, network errors). Uses exponential backoff. Default: 2
 ### Methods
--- a/doc/rate-limits.md
+++ b/doc/rate-limits.md
@ -48,20 +48,14 @@ HTTP/1.1 503 Service Unavailable
 - **Implement exponential backoff** when you receive a `429` response. Start with a short delay (e.g. 500 ms) and double it on each subsequent failure, up to a reasonable maximum.
 - **Monitor for `503` responses** — repeated occurrences indicate that your usage pattern is triggering the abuse threshold. Refactor your request logic before the cool-down expires.
-## Example: Exponential Backoff
+## Retry Behaviour
 The client retries automatically on `429`, `500`, `502`, `503`, `504`, and network errors using exponential backoff (1 s, 2 s, …). The default is **2 retries**. You can raise or disable this per client:
 ```python
-import asyncio
+# More retries for high-throughput workloads
-import httpx
+client = SecureChatCompletion(api_key="...", max_retries=5)
-async def request_with_backoff(client, *args, max_retries=5, **kwargs):
+# Disable retries entirely
-    delay = 0.5
+client = SecureChatCompletion(api_key="...", max_retries=0)
    for attempt in range(max_retries):
        response = await client.create(*args, **kwargs)
        if response.status_code == 429:
            await asyncio.sleep(delay)
            delay = min(delay * 2, 30)
            continue
        return response
    raise RuntimeError("Rate limit exceeded after maximum retries")
 ```
--- a/nomyo/SecureCompletionClient.py
+++ b/nomyo/SecureCompletionClient.py
@ -1,4 +1,4 @@
-import ctypes, json, base64, urllib.parse, httpx, os, secrets, sys, warnings, logging
+import asyncio, ctypes, json, base64, urllib.parse, httpx, os, secrets, sys, warnings, logging
 from typing import Dict, Any, Optional
 from cryptography.hazmat.primitives import serialization, hashes
 from cryptography.hazmat.primitives.asymmetric import rsa, padding
@ -76,7 +76,7 @@ class SecureCompletionClient:
    - Response parsing
    """
-    def __init__(self, router_url: str = "https://api.nomyo.ai", allow_http: bool = False, secure_memory: bool = True):
+    def __init__(self, router_url: str = "https://api.nomyo.ai", allow_http: bool = False, secure_memory: bool = True, max_retries: int = 2):
        """
        Initialize the secure completion client.
@ -84,6 +84,9 @@ class SecureCompletionClient:
            router_url: Base URL of the NOMYO Router (must use HTTPS for production)
            allow_http: Allow HTTP connections (ONLY for local development, never in production)
            secure_memory: Whether to use secure memory operations for this instance.
            max_retries: Number of retries on retryable errors (429, 500, 502, 503, 504,
                         network errors). Uses exponential backoff. Default 2, matching
                         the OpenAI Python SDK default.
        """
        self.router_url = router_url.rstrip('/')
        self.private_key = None
@ -91,6 +94,7 @@ class SecureCompletionClient:
        self.key_size = 4096  # RSA key size
        self.allow_http = allow_http  # Store for use in fetch_server_public_key
        self._use_secure_memory = _SECURE_MEMORY_AVAILABLE and secure_memory
        self.max_retries = max_retries
        # Validate HTTPS for security
        if not self.router_url.startswith("https://"):
@ -659,6 +663,15 @@ class SecureCompletionClient:
        url = f"{self.router_url}/v1/chat/secure_completion"
        logger.debug("Target URL: %s", url)
        _RETRYABLE_STATUS_CODES = {429, 500, 502, 503, 504}
        last_exc: Exception = APIConnectionError("Request failed")
        for attempt in range(self.max_retries + 1):
            if attempt > 0:
                delay = 2 ** (attempt - 1)  # 1s, 2s, 4s, …
                logger.warning("Retrying request (attempt %d/%d) after %.1fs...", attempt, self.max_retries, delay)
                await asyncio.sleep(delay)
            try:
                async with httpx.AsyncClient(timeout=60.0) as client:
                    response = await client.post(
@ -676,7 +689,6 @@ class SecureCompletionClient:
                    return decrypted_response
                elif response.status_code == 400:
                    # Bad request
                    try:
                        error = response.json()
                        raise InvalidRequestError(
@ -688,7 +700,6 @@ class SecureCompletionClient:
                        raise InvalidRequestError("Bad request: Invalid response format")
                elif response.status_code == 401:
                    # Unauthorized - authentication failed
                    try:
                        error = response.json()
                        error_message = error.get('detail', 'Invalid API key or authentication failed')
@ -701,7 +712,6 @@ class SecureCompletionClient:
                        raise AuthenticationError("Invalid API key or authentication failed")
                elif response.status_code == 403:
                    # Forbidden - model not allowed for security tier
                    try:
                        error = response.json()
                        raise ForbiddenError(
@ -713,7 +723,6 @@ class SecureCompletionClient:
                        raise ForbiddenError("Forbidden: Model not allowed for the requested security tier")
                elif response.status_code == 404:
                    # Endpoint not found
                    try:
                        error = response.json()
                        raise APIError(
@ -724,44 +733,47 @@ class SecureCompletionClient:
                    except (json.JSONDecodeError, ValueError):
                        raise APIError("Endpoint not found: Secure inference not enabled")
-                elif response.status_code == 429:
+                elif response.status_code in _RETRYABLE_STATUS_CODES:
                    # Rate limit exceeded
                    try:
                        error = response.json()
-                        raise RateLimitError(
+                        if not isinstance(error, dict):
-                            f"Rate limit exceeded: {error.get('detail', 'Too many requests')}",
+                            error = {"detail": "unknown"}
                        detail_msg = error.get("detail", "unknown")
                    except (json.JSONDecodeError, ValueError):
                        error = {}
                        detail_msg = "unknown"
                    if response.status_code == 429:
                        last_exc = RateLimitError(
                            f"Rate limit exceeded: {detail_msg}",
                            status_code=429,
                            error_details=error
                        )
                    except (json.JSONDecodeError, ValueError):
                        raise RateLimitError("Rate limit exceeded: Too many requests")
                    elif response.status_code == 500:
-                    # Server error
+                        last_exc = ServerError(
-                    try:
+                            f"Server error: {detail_msg}",
                        error = response.json()
                        raise ServerError(
                            f"Server error: {error.get('detail', 'Internal server error')}",
                            status_code=500,
                            error_details=error
                        )
                    except (json.JSONDecodeError, ValueError):
                        raise ServerError("Server error: Internal server error")
                    elif response.status_code == 503:
-                    # Service unavailable - inference backend is down
+                        last_exc = ServiceUnavailableError(
-                    try:
+                            f"Service unavailable: {detail_msg}",
                        error = response.json()
                        raise ServiceUnavailableError(
                            f"Service unavailable: {error.get('detail', 'Inference backend is unavailable')}",
                            status_code=503,
                            error_details=error
                        )
-                    except (json.JSONDecodeError, ValueError):
+                    else:
-                        raise ServiceUnavailableError("Service unavailable: Inference backend is unavailable")
+                        last_exc = APIError(
                            f"Unexpected status code: {response.status_code} {detail_msg}",
                            status_code=response.status_code,
                            error_details=error
                        )
                    if attempt < self.max_retries:
                        logger.warning("Got retryable status %d: %s", response.status_code, detail_msg)
                        continue
                    raise last_exc
                else:
                    # Unexpected status code
                    try:
                        unexp_detail = response.json()
                        if not isinstance(unexp_detail, dict):
@ -775,9 +787,13 @@ class SecureCompletionClient:
                    )
            except httpx.NetworkError as e:
-            raise APIConnectionError(f"Failed to connect to router: {e}")
+                last_exc = APIConnectionError(f"Failed to connect to router: {e}")
                if attempt < self.max_retries:
                    logger.warning("Network error on attempt %d: %s", attempt, e)
                    continue
                raise last_exc
            except (SecurityError, APIError, AuthenticationError, InvalidRequestError, ForbiddenError, RateLimitError, ServerError, ServiceUnavailableError, APIConnectionError):
-            raise  # Re-raise known exceptions
+                raise  # Non-retryable — propagate immediately
            except Exception:
                logger.exception("Unexpected error in send_secure_request")
                raise APIConnectionError("Request failed due to an unexpected error")
--- a/nomyo/init.py
+++ b/nomyo/init.py
@ -51,6 +51,6 @@ try:
 except ImportError:
    pass
-__version__ = "0.2.5"
+__version__ = "0.2.6"
 __author__ = "NOMYO AI"
 __license__ = "Apache-2.0"
--- a/nomyo/nomyo.py
+++ b/nomyo/nomyo.py
@ -52,7 +52,7 @@ class SecureChatCompletion:
        ```
    """
-    def __init__(self, base_url: str = "https://api.nomyo.ai", allow_http: bool = False, api_key: Optional[str] = None, secure_memory: bool = True, key_dir: Optional[str] = None):
+    def __init__(self, base_url: str = "https://api.nomyo.ai", allow_http: bool = False, api_key: Optional[str] = None, secure_memory: bool = True, key_dir: Optional[str] = None, max_retries: int = 2):
        """
        Initialize the secure chat completion client.
@ -68,8 +68,10 @@ class SecureChatCompletion:
                          Set to False for testing or when security is not required.
            key_dir: Directory to load/save RSA keys. If None, ephemeral keys are
                     generated in memory for this session only.
            max_retries: Number of retries on retryable errors (429, 500, 502, 503, 504,
                        network errors). Uses exponential backoff. Default 2.
        """
-        self.client = SecureCompletionClient(router_url=base_url, allow_http=allow_http, secure_memory=secure_memory)
+        self.client = SecureCompletionClient(router_url=base_url, allow_http=allow_http, secure_memory=secure_memory, max_retries=max_retries)
        self._keys_initialized = False
        self._keys_lock = asyncio.Lock()
        self.api_key = api_key
--- a/pyproject.toml
+++ b/pyproject.toml
@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "nomyo"
-version = "0.2.5"
+version = "0.2.6"
 description = "OpenAI-compatible secure chat client with end-to-end encryption for NOMYO Inference Endpoints"
 authors = [
    {name = "NOMYO.AI", email = "ichi@nomyo.ai"},