diff --git a/.forgejo/workflows/publish.yml b/.forgejo/workflows/publish.yml index 9fdee68..17f700c 100644 --- a/.forgejo/workflows/publish.yml +++ b/.forgejo/workflows/publish.yml @@ -7,10 +7,16 @@ on: workflow_dispatch: jobs: - publish: - runs-on: docker-amd64 + build-and-publish: + name: Build & Publish (${{ matrix.runner }}, py${{ matrix.python }}) + runs-on: ${{ matrix.runner }} container: - image: python:3.12-bookworm + image: python:${{ matrix.python }}-bookworm + + strategy: + matrix: + python: ["3.10", "3.11", "3.12"] + runner: [docker-amd64, docker-arm64] steps: - name: Checkout repository @@ -20,13 +26,21 @@ jobs: . - name: Install build tools - run: pip install build twine + run: | + apt-get update -qq && apt-get install -y patchelf + pip install build Cython twine auditwheel - - name: Build package - run: python -m build + - name: Build wheel + run: python -m build --wheel + + - name: Repair wheel to manylinux + run: auditwheel repair dist/*.whl --wheel-dir wheelhouse/ + + - name: Check wheel metadata + run: twine check wheelhouse/*.whl - name: Publish to PyPI env: TWINE_USERNAME: __token__ TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} - run: twine upload dist/* + run: twine upload --verbose wheelhouse/*.whl diff --git a/README.md b/README.md index 32c6c81..1846d2e 100644 --- a/README.md +++ b/README.md @@ -10,17 +10,6 @@ ## 🚀 Quick Start -### 0. Try It Now (Demo Credentials) - -No account needed — use these public demo credentials to test immediately: - -| | | -|---|---| -| **API key** | `NOMYO_AI_E2EE_INFERENCE` | -| **Model** | `Qwen/Qwen3-0.6B` | - -> **Note:** The demo endpoint uses a fixed 256-token context window and is intended for evaluation only. - ### 1. Install methods via pip (recommended): @@ -360,8 +349,7 @@ SecureChatCompletion( base_url: str = "https://api.nomyo.ai", allow_http: bool = False, api_key: Optional[str] = None, - secure_memory: bool = True, - max_retries: int = 2 + secure_memory: bool = True ) ``` @@ -371,7 +359,6 @@ SecureChatCompletion( - `allow_http`: Allow HTTP connections (ONLY for local development, never in production) - `api_key`: Optional API key for bearer authentication - `secure_memory`: Enable secure memory protection (default: True) -- `max_retries`: Retries on retryable errors (429, 500, 502, 503, 504, network errors) with exponential backoff. Default: 2 #### Methods @@ -383,7 +370,7 @@ SecureChatCompletion( #### Constructor ```python -SecureCompletionClient(router_url: str = "https://api.nomyo.ai", allow_http: bool = False, max_retries: int = 2) +SecureCompletionClient(router_url: str = "https://api.nomyo.ai") ``` #### Methods diff --git a/doc/api-reference.md b/doc/api-reference.md index 1069082..439e471 100644 --- a/doc/api-reference.md +++ b/doc/api-reference.md @@ -11,8 +11,7 @@ SecureChatCompletion( base_url: str = "https://api.nomyo.ai", allow_http: bool = False, api_key: Optional[str] = None, - secure_memory: bool = True, - max_retries: int = 2 + secure_memory: bool = True ) ``` @@ -22,7 +21,6 @@ SecureChatCompletion( - `allow_http` (bool): Allow HTTP connections (ONLY for local development, never in production) - `api_key` (Optional[str]): Optional API key for bearer authentication - `secure_memory` (bool): Enable secure memory protection (default: True) -- `max_retries` (int): Number of retries on retryable errors (429, 500, 502, 503, 504, network errors). Uses exponential backoff. Default: 2 ### Methods @@ -75,30 +73,10 @@ A dictionary containing the chat completion response with the following structur "prompt_tokens": int, "completion_tokens": int, "total_tokens": int - }, - "_metadata": { - "payload_id": str, - "processed_at": int, # Unix timestamp - "is_encrypted": bool, - "response_status": str, - "security_tier": str, # "standard", "high", or "maximum" - "memory_protection": dict, # server-side memory protection info - "cuda_device": dict, # privacy-safe GPU info (hashed identifiers) - "tpm_attestation": { # TPM 2.0 hardware attestation (see Security Guide) - "is_available": bool, - # Present only when is_available is True: - "pcr_banks": str, # e.g. "sha256:0,7,10" - "pcr_values": dict, # {bank: {pcr_index: hex_digest}} - "quote_b64": str, # base64-encoded TPMS_ATTEST (signed by AIK) - "signature_b64": str, # base64-encoded TPMT_SIGNATURE - "aik_pubkey_b64": str, # base64-encoded TPM2B_PUBLIC (ephemeral AIK) - } } } ``` -The `_metadata` field is added by the client library and is not part of the OpenAI API response format. See the [Security Guide](security-guide.md) for how to interpret and verify `tpm_attestation`. - #### acreate(model, messages, **kwargs) Async alias for create() method. @@ -114,18 +92,13 @@ The `SecureCompletionClient` class handles the underlying encryption, key manage ### Constructor ```python -SecureCompletionClient( - router_url: str = "https://api.nomyo.ai", - allow_http: bool = False, - max_retries: int = 2 -) +SecureCompletionClient(router_url: str = "https://api.nomyo.ai", allow_http: bool = False) ``` **Parameters:** - `router_url` (str): Base URL of the NOMYO Router (must use HTTPS for production) - `allow_http` (bool): Allow HTTP connections (ONLY for local development, never in production) -- `max_retries` (int): Number of retries on retryable errors (429, 500, 502, 503, 504, network errors). Uses exponential backoff. Default: 2 ### Methods diff --git a/doc/getting-started.md b/doc/getting-started.md index 1cf78a4..4ccdf82 100644 --- a/doc/getting-started.md +++ b/doc/getting-started.md @@ -1,33 +1,5 @@ # Getting Started -## Try It Now (Demo Credentials) - -You can test the client immediately using these public demo credentials — no sign-up required: - -| | | -|---|---| -| **API key** | `NOMYO_AI_E2EE_INFERENCE` | -| **Model** | `Qwen/Qwen3-0.6B` | - -> **Note:** The demo endpoint uses a fixed 256-token context window and is intended for evaluation only. - -```python -import asyncio -from nomyo import SecureChatCompletion - -async def main(): - client = SecureChatCompletion(api_key="NOMYO_AI_E2EE_INFERENCE") - - response = await client.create( - model="Qwen/Qwen3-0.6B", - messages=[{"role": "user", "content": "Hello!"}] - ) - - print(response['choices'][0]['message']['content']) - -asyncio.run(main()) -``` - ## Basic Usage The NOMYO client provides end-to-end encryption (E2E) for all communications between your application and the NOMYO inference endpoints. This ensures that your prompts and responses are protected from unauthorized access or interception. diff --git a/doc/rate-limits.md b/doc/rate-limits.md index 2c18da2..7d9f85c 100644 --- a/doc/rate-limits.md +++ b/doc/rate-limits.md @@ -48,14 +48,20 @@ HTTP/1.1 503 Service Unavailable - **Implement exponential backoff** when you receive a `429` response. Start with a short delay (e.g. 500 ms) and double it on each subsequent failure, up to a reasonable maximum. - **Monitor for `503` responses** — repeated occurrences indicate that your usage pattern is triggering the abuse threshold. Refactor your request logic before the cool-down expires. -## Retry Behaviour - -The client retries automatically on `429`, `500`, `502`, `503`, `504`, and network errors using exponential backoff (1 s, 2 s, …). The default is **2 retries**. You can raise or disable this per client: +## Example: Exponential Backoff ```python -# More retries for high-throughput workloads -client = SecureChatCompletion(api_key="...", max_retries=5) +import asyncio +import httpx -# Disable retries entirely -client = SecureChatCompletion(api_key="...", max_retries=0) +async def request_with_backoff(client, *args, max_retries=5, **kwargs): + delay = 0.5 + for attempt in range(max_retries): + response = await client.create(*args, **kwargs) + if response.status_code == 429: + await asyncio.sleep(delay) + delay = min(delay * 2, 30) + continue + return response + raise RuntimeError("Rate limit exceeded after maximum retries") ``` diff --git a/doc/security-guide.md b/doc/security-guide.md index 6e4abdc..6c34f71 100644 --- a/doc/security-guide.md +++ b/doc/security-guide.md @@ -162,81 +162,6 @@ Secure memory features: - Guarantees zeroing of sensitive memory - Prevents memory dumps from containing sensitive data -## Hardware Attestation (TPM 2.0) - -### What it is - -When the server has a TPM 2.0 chip, every response includes a `tpm_attestation` block in `_metadata`. This is a cryptographically signed hardware quote proving: - -- Which firmware and Secure Boot state the server is running (PCR 0, 7) -- Which application binary is running, when IMA is active (PCR 10) - -The quote is signed by an ephemeral AIK (Attestation Identity Key) generated fresh for each request and tied to the `payload_id` nonce, so it cannot be replayed for a different request. - -### Reading the attestation - -```python -response = await client.create( - model="Qwen/Qwen3-0.6B", - messages=[{"role": "user", "content": "..."}], - security_tier="maximum" -) - -tpm = response["_metadata"].get("tpm_attestation", {}) - -if tpm.get("is_available"): - print("PCR banks:", tpm["pcr_banks"]) # e.g. "sha256:0,7,10" - print("PCR values:", tpm["pcr_values"]) # {bank: {index: hex}} - print("AIK key:", tpm["aik_pubkey_b64"][:32], "...") -else: - print("TPM not available on this server") -``` - -### Verifying the quote - -The response is self-contained: `aik_pubkey_b64` is the full public key of the AIK that signed the quote, so no separate key-fetch round-trip is needed. - -Verification steps using `tpm2-pytss`: - -```python -import base64 -from tpm2_pytss.types import TPM2B_PUBLIC, TPMT_SIGNATURE, TPM2B_ATTEST - -# 1. Decode the quote components -aik_pub = TPM2B_PUBLIC.unmarshal(base64.b64decode(tpm["aik_pubkey_b64"]))[0] -quote = TPM2B_ATTEST.unmarshal(base64.b64decode(tpm["quote_b64"]))[0] -sig = TPMT_SIGNATURE.unmarshal(base64.b64decode(tpm["signature_b64"]))[0] - -# 2. Verify the signature over the quote using the AIK public key -# (use a TPM ESAPI verify_signature call or an offline RSA verify) - -# 3. Inspect the qualifying_data inside the quote — it must match -# SHA-256(payload_id.encode())[:16] to confirm this quote is for this request - -# 4. Check pcr_values against your known-good baseline -``` - -> Full verification requires `tpm2-pytss` on the client side (`pip install tpm2-pytss` + `sudo apt install libtss2-dev`). It is optional — the attestation is informational unless your deployment policy requires verification. - -### Behaviour per security tier - -| Tier | TPM unavailable | -|------|----------------| -| `standard` | `tpm_attestation: {"is_available": false}` — request proceeds | -| `high` | same as standard | -| `maximum` | `ServiceUnavailableError` (HTTP 503) — request rejected | - -For `maximum` tier, the server enforces TPM availability as a hard requirement. If your server has no TPM and you request `maximum`, catch the error explicitly: - -```python -from nomyo import ServiceUnavailableError - -try: - response = await client.create(..., security_tier="maximum") -except ServiceUnavailableError as e: - print("Server does not meet TPM requirements for maximum tier:", e) -``` - ## Compliance Considerations ### HIPAA Compliance @@ -282,11 +207,9 @@ response = await client.create( messages=[{"role": "user", "content": "Hello"}] ) -print(response["_metadata"]) # Contains security_tier, memory_protection, tpm_attestation, etc. +print(response["_metadata"]) # Contains security-related information ``` -See [Hardware Attestation](#hardware-attestation-tpm-20) for details on the `tpm_attestation` field. - ### Logging Enable logging to see security operations: diff --git a/nomyo/SecureCompletionClient.py b/nomyo/SecureCompletionClient.py index 9c503a8..ee81942 100644 --- a/nomyo/SecureCompletionClient.py +++ b/nomyo/SecureCompletionClient.py @@ -1,5 +1,5 @@ -import asyncio, ctypes, json, base64, urllib.parse, httpx, os, secrets, sys, warnings, logging -from typing import Dict, Any, Optional, Union +import ctypes, json, base64, urllib.parse, httpx, os, secrets, sys, warnings, logging +from typing import Dict, Any, Optional from cryptography.hazmat.primitives import serialization, hashes from cryptography.hazmat.primitives.asymmetric import rsa, padding from cryptography.hazmat.backends import default_backend @@ -76,7 +76,7 @@ class SecureCompletionClient: - Response parsing """ - def __init__(self, router_url: str = "https://api.nomyo.ai", allow_http: bool = False, secure_memory: bool = True, max_retries: int = 2): + def __init__(self, router_url: str = "https://api.nomyo.ai", allow_http: bool = False, secure_memory: bool = True): """ Initialize the secure completion client. @@ -84,9 +84,6 @@ class SecureCompletionClient: router_url: Base URL of the NOMYO Router (must use HTTPS for production) allow_http: Allow HTTP connections (ONLY for local development, never in production) secure_memory: Whether to use secure memory operations for this instance. - max_retries: Number of retries on retryable errors (429, 500, 502, 503, 504, - network errors). Uses exponential backoff. Default 2, matching - the OpenAI Python SDK default. """ self.router_url = router_url.rstrip('/') self.private_key = None @@ -94,7 +91,6 @@ class SecureCompletionClient: self.key_size = 4096 # RSA key size self.allow_http = allow_http # Store for use in fetch_server_public_key self._use_secure_memory = _SECURE_MEMORY_AVAILABLE and secure_memory - self.max_retries = max_retries # Validate HTTPS for security if not self.router_url.startswith("https://"): @@ -301,7 +297,7 @@ class SecureCompletionClient: verify_ssl = self.router_url.startswith("https://") async with httpx.AsyncClient( - timeout=900.0, + timeout=60.0, verify=verify_ssl, # Verify SSL/TLS certificates for HTTPS ) as client: response = await client.get(url) @@ -663,22 +659,13 @@ class SecureCompletionClient: url = f"{self.router_url}/v1/chat/secure_completion" logger.debug("Target URL: %s", url) - _RETRYABLE_STATUS_CODES = {429, 500, 502, 503, 504} - last_exc: Exception = APIConnectionError("Request failed") - - for attempt in range(self.max_retries + 1): - if attempt > 0: - delay = 2 ** (attempt - 1) # 1s, 2s, 4s, … - logger.warning("Retrying request (attempt %d/%d) after %.1fs...", attempt, self.max_retries, delay) - await asyncio.sleep(delay) - - try: - async with httpx.AsyncClient(timeout=900.0) as client: - response = await client.post( - url, - headers=headers, - content=encrypted_payload - ) + try: + async with httpx.AsyncClient(timeout=60.0) as client: + response = await client.post( + url, + headers=headers, + content=encrypted_payload + ) logger.debug("HTTP Status: %d", response.status_code) @@ -689,6 +676,7 @@ class SecureCompletionClient: return decrypted_response elif response.status_code == 400: + # Bad request try: error = response.json() raise InvalidRequestError( @@ -700,6 +688,7 @@ class SecureCompletionClient: raise InvalidRequestError("Bad request: Invalid response format") elif response.status_code == 401: + # Unauthorized - authentication failed try: error = response.json() error_message = error.get('detail', 'Invalid API key or authentication failed') @@ -712,6 +701,7 @@ class SecureCompletionClient: raise AuthenticationError("Invalid API key or authentication failed") elif response.status_code == 403: + # Forbidden - model not allowed for security tier try: error = response.json() raise ForbiddenError( @@ -723,6 +713,7 @@ class SecureCompletionClient: raise ForbiddenError("Forbidden: Model not allowed for the requested security tier") elif response.status_code == 404: + # Endpoint not found try: error = response.json() raise APIError( @@ -733,47 +724,44 @@ class SecureCompletionClient: except (json.JSONDecodeError, ValueError): raise APIError("Endpoint not found: Secure inference not enabled") - elif response.status_code in _RETRYABLE_STATUS_CODES: + elif response.status_code == 429: + # Rate limit exceeded try: error = response.json() - if not isinstance(error, dict): - error = {"detail": "unknown"} - detail_msg = error.get("detail", "unknown") - except (json.JSONDecodeError, ValueError): - error = {} - detail_msg = "unknown" - - if response.status_code == 429: - last_exc = RateLimitError( - f"Rate limit exceeded: {detail_msg}", + raise RateLimitError( + f"Rate limit exceeded: {error.get('detail', 'Too many requests')}", status_code=429, error_details=error ) - elif response.status_code == 500: - last_exc = ServerError( - f"Server error: {detail_msg}", + except (json.JSONDecodeError, ValueError): + raise RateLimitError("Rate limit exceeded: Too many requests") + + elif response.status_code == 500: + # Server error + try: + error = response.json() + raise ServerError( + f"Server error: {error.get('detail', 'Internal server error')}", status_code=500, error_details=error ) - elif response.status_code == 503: - last_exc = ServiceUnavailableError( - f"Service unavailable: {detail_msg}", + except (json.JSONDecodeError, ValueError): + raise ServerError("Server error: Internal server error") + + elif response.status_code == 503: + # Service unavailable - inference backend is down + try: + error = response.json() + raise ServiceUnavailableError( + f"Service unavailable: {error.get('detail', 'Inference backend is unavailable')}", status_code=503, error_details=error ) - else: - last_exc = APIError( - f"Unexpected status code: {response.status_code} {detail_msg}", - status_code=response.status_code, - error_details=error - ) - - if attempt < self.max_retries: - logger.warning("Got retryable status %d: %s", response.status_code, detail_msg) - continue - raise last_exc + except (json.JSONDecodeError, ValueError): + raise ServiceUnavailableError("Service unavailable: Inference backend is unavailable") else: + # Unexpected status code try: unexp_detail = response.json() if not isinstance(unexp_detail, dict): @@ -786,17 +774,13 @@ class SecureCompletionClient: status_code=response.status_code ) - except httpx.NetworkError as e: - last_exc = APIConnectionError(f"Failed to connect to router: {e}") - if attempt < self.max_retries: - logger.warning("Network error on attempt %d: %s", attempt, e) - continue - raise last_exc - except (SecurityError, APIError, AuthenticationError, InvalidRequestError, ForbiddenError, RateLimitError, ServerError, ServiceUnavailableError, APIConnectionError): - raise # Non-retryable — propagate immediately - except Exception: - logger.exception("Unexpected error in send_secure_request") - raise APIConnectionError("Request failed due to an unexpected error") + except httpx.NetworkError as e: + raise APIConnectionError(f"Failed to connect to router: {e}") + except (SecurityError, APIError, AuthenticationError, InvalidRequestError, ForbiddenError, RateLimitError, ServerError, ServiceUnavailableError, APIConnectionError): + raise # Re-raise known exceptions + except Exception: + logger.exception("Unexpected error in send_secure_request") + raise APIConnectionError("Request failed due to an unexpected error") def _validate_rsa_key(self, key, key_type: str = "private") -> None: """ diff --git a/nomyo/__init__.py b/nomyo/__init__.py index 38d9bef..0a81157 100644 --- a/nomyo/__init__.py +++ b/nomyo/__init__.py @@ -51,6 +51,6 @@ try: except ImportError: pass -__version__ = "0.2.9" +__version__ = "0.2.5" __author__ = "NOMYO AI" __license__ = "Apache-2.0" diff --git a/nomyo/nomyo.py b/nomyo/nomyo.py index 95709b2..682997b 100644 --- a/nomyo/nomyo.py +++ b/nomyo/nomyo.py @@ -52,7 +52,7 @@ class SecureChatCompletion: ``` """ - def __init__(self, base_url: str = "https://api.nomyo.ai", allow_http: bool = False, api_key: Optional[str] = None, secure_memory: bool = True, key_dir: Optional[str] = None, max_retries: int = 2): + def __init__(self, base_url: str = "https://api.nomyo.ai", allow_http: bool = False, api_key: Optional[str] = None, secure_memory: bool = True, key_dir: Optional[str] = None): """ Initialize the secure chat completion client. @@ -68,10 +68,8 @@ class SecureChatCompletion: Set to False for testing or when security is not required. key_dir: Directory to load/save RSA keys. If None, ephemeral keys are generated in memory for this session only. - max_retries: Number of retries on retryable errors (429, 500, 502, 503, 504, - network errors). Uses exponential backoff. Default 2. """ - self.client = SecureCompletionClient(router_url=base_url, allow_http=allow_http, secure_memory=secure_memory, max_retries=max_retries) + self.client = SecureCompletionClient(router_url=base_url, allow_http=allow_http, secure_memory=secure_memory) self._keys_initialized = False self._keys_lock = asyncio.Lock() self.api_key = api_key diff --git a/pyproject.toml b/pyproject.toml index c42fef8..e0902c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,10 @@ [build-system] -requires = ["hatchling>=1.0.0", "wheel"] -build-backend = "hatchling.build" +requires = ["setuptools>=68", "wheel", "Cython>=3.0"] +build-backend = "setuptools.build_meta" [project] name = "nomyo" -version = "0.2.9" +version = "0.2.5" description = "OpenAI-compatible secure chat client with end-to-end encryption for NOMYO Inference Endpoints" authors = [ {name = "NOMYO.AI", email = "ichi@nomyo.ai"}, @@ -27,16 +27,16 @@ classifiers = [ ] requires-python = ">=3.10" dependencies = [ - "anyio==4.13.0", - "certifi==2026.5.20", + "anyio==4.12.0", + "certifi==2025.11.12", "cffi==2.0.0", - "cryptography==48.0.0", + "cryptography==46.0.6", "exceptiongroup==1.3.1", "h11==0.16.0", "httpcore==1.0.9", "httpx==0.28.1", - "idna==3.16", - "pycparser==3.0", + "idna==3.11", + "pycparser==2.23", "typing_extensions==4.15.0", ] @@ -46,8 +46,5 @@ Documentation = "https://bitfreedom.net/code/nomyo-ai/nomyo/wiki/NOMYO-Secure-Cl Repository = "https://bitfreedom.net/code/nomyo-ai/nomyo" Issues = "https://bitfreedom.net/code/nomyo-ai/nomyo/issues" -[tool.hatch.build.targets.wheel] -packages = ["nomyo"] - -[tool.hatch.build.targets.sdist] -exclude = ["test/", "build.sh", "dist/"] +[tool.setuptools.packages.find] +include = ["nomyo*"] diff --git a/renovate.json b/renovate.json deleted file mode 100644 index 6af7325..0000000 --- a/renovate.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "$schema": "https://docs.renovatebot.com/renovate-schema.json", - "extends": [ - "local>nomyo-ai/renovate-config" - ] -} diff --git a/requirements.txt b/requirements.txt index 2e17834..868c9f0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,11 @@ -anyio==4.13.0 -certifi==2026.5.20 +anyio==4.12.0 +certifi==2025.11.12 cffi==2.0.0 -cryptography==48.0.0 +cryptography==46.0.6 exceptiongroup==1.3.1 h11==0.16.0 httpcore==1.0.9 httpx==0.28.1 -idna==3.16 -pycparser==3.0 +idna==3.11 +pycparser==2.23 typing_extensions==4.15.0 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..92d0bd3 --- /dev/null +++ b/setup.py @@ -0,0 +1,31 @@ +from setuptools import setup +from setuptools.command.build_py import build_py as _build_py +from Cython.Build import cythonize + +# Modules compiled to .so — exclude their .py source from the wheel +COMPILED_MODULES = {"nomyo", "SecureCompletionClient", "SecureMemory"} + + +class BuildPyNoPy(_build_py): + """Skip copying .py source files for cythonized modules.""" + + def find_package_modules(self, package, package_dir): + modules = super().find_package_modules(package, package_dir) + return [ + (pkg, mod, path) + for pkg, mod, path in modules + if not (pkg == "nomyo" and mod in COMPILED_MODULES) + ] + + +setup( + ext_modules=cythonize( + [ + "nomyo/nomyo.py", + "nomyo/SecureCompletionClient.py", + "nomyo/SecureMemory.py", + ], + compiler_directives={"language_level": "3"}, + ), + cmdclass={"build_py": BuildPyNoPy}, +)