diff --git a/.forgejo/workflows/publish.yml b/.forgejo/workflows/publish.yml
index 9fdee68..17f700c 100644
--- a/.forgejo/workflows/publish.yml
+++ b/.forgejo/workflows/publish.yml
@@ -7,10 +7,16 @@ on:
   workflow_dispatch:
 
 jobs:
-  publish:
-    runs-on: docker-amd64
+  build-and-publish:
+    name: Build & Publish (${{ matrix.runner }}, py${{ matrix.python }})
+    runs-on: ${{ matrix.runner }}
     container:
-      image: python:3.12-bookworm
+      image: python:${{ matrix.python }}-bookworm
+
+    strategy:
+      matrix:
+        python: ["3.10", "3.11", "3.12"]
+        runner: [docker-amd64, docker-arm64]
 
     steps:
       - name: Checkout repository
@@ -20,13 +26,21 @@ jobs:
             .
 
       - name: Install build tools
-        run: pip install build twine
+        run: |
+          apt-get update -qq && apt-get install -y patchelf
+          pip install build Cython twine auditwheel
 
-      - name: Build package
-        run: python -m build
+      - name: Build wheel
+        run: python -m build --wheel
+
+      - name: Repair wheel to manylinux
+        run: auditwheel repair dist/*.whl --wheel-dir wheelhouse/
+
+      - name: Check wheel metadata
+        run: twine check wheelhouse/*.whl
 
       - name: Publish to PyPI
         env:
           TWINE_USERNAME: __token__
           TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
-        run: twine upload dist/*
+        run: twine upload --verbose wheelhouse/*.whl
diff --git a/README.md b/README.md
index 32c6c81..1846d2e 100644
--- a/README.md
+++ b/README.md
@@ -10,17 +10,6 @@
 
 ## 🚀 Quick Start
 
-### 0. Try It Now (Demo Credentials)
-
-No account needed — use these public demo credentials to test immediately:
-
-| | |
-|---|---|
-| **API key** | `NOMYO_AI_E2EE_INFERENCE` |
-| **Model** | `Qwen/Qwen3-0.6B` |
-
-> **Note:** The demo endpoint uses a fixed 256-token context window and is intended for evaluation only.
-
 ### 1. Install methods
 
 via pip (recommended):
@@ -360,8 +349,7 @@ SecureChatCompletion(
     base_url: str = "https://api.nomyo.ai",
     allow_http: bool = False,
     api_key: Optional[str] = None,
-    secure_memory: bool = True,
-    max_retries: int = 2
+    secure_memory: bool = True
 )
 ```
 
@@ -371,7 +359,6 @@ SecureChatCompletion(
 - `allow_http`: Allow HTTP connections (ONLY for local development, never in production)
 - `api_key`: Optional API key for bearer authentication
 - `secure_memory`: Enable secure memory protection (default: True)
-- `max_retries`: Retries on retryable errors (429, 500, 502, 503, 504, network errors) with exponential backoff. Default: 2
 
 #### Methods
 
@@ -383,7 +370,7 @@ SecureChatCompletion(
 #### Constructor
 
 ```python
-SecureCompletionClient(router_url: str = "https://api.nomyo.ai", allow_http: bool = False, max_retries: int = 2)
+SecureCompletionClient(router_url: str = "https://api.nomyo.ai")
 ```
 
 #### Methods
diff --git a/doc/api-reference.md b/doc/api-reference.md
index 1069082..439e471 100644
--- a/doc/api-reference.md
+++ b/doc/api-reference.md
@@ -11,8 +11,7 @@ SecureChatCompletion(
     base_url: str = "https://api.nomyo.ai",
     allow_http: bool = False,
     api_key: Optional[str] = None,
-    secure_memory: bool = True,
-    max_retries: int = 2
+    secure_memory: bool = True
 )
 ```
 
@@ -22,7 +21,6 @@ SecureChatCompletion(
 - `allow_http` (bool): Allow HTTP connections (ONLY for local development, never in production)
 - `api_key` (Optional[str]): Optional API key for bearer authentication
 - `secure_memory` (bool): Enable secure memory protection (default: True)
-- `max_retries` (int): Number of retries on retryable errors (429, 500, 502, 503, 504, network errors). Uses exponential backoff. Default: 2
 
 ### Methods
 
@@ -75,30 +73,10 @@ A dictionary containing the chat completion response with the following structur
         "prompt_tokens": int,
         "completion_tokens": int,
         "total_tokens": int
-    },
-    "_metadata": {
-        "payload_id": str,
-        "processed_at": int,          # Unix timestamp
-        "is_encrypted": bool,
-        "response_status": str,
-        "security_tier": str,         # "standard", "high", or "maximum"
-        "memory_protection": dict,    # server-side memory protection info
-        "cuda_device": dict,          # privacy-safe GPU info (hashed identifiers)
-        "tpm_attestation": {          # TPM 2.0 hardware attestation (see Security Guide)
-            "is_available": bool,
-            # Present only when is_available is True:
-            "pcr_banks": str,         # e.g. "sha256:0,7,10"
-            "pcr_values": dict,       # {bank: {pcr_index: hex_digest}}
-            "quote_b64": str,         # base64-encoded TPMS_ATTEST (signed by AIK)
-            "signature_b64": str,     # base64-encoded TPMT_SIGNATURE
-            "aik_pubkey_b64": str,    # base64-encoded TPM2B_PUBLIC (ephemeral AIK)
-        }
     }
 }
 ```
 
-The `_metadata` field is added by the client library and is not part of the OpenAI API response format. See the [Security Guide](security-guide.md) for how to interpret and verify `tpm_attestation`.
-
 #### acreate(model, messages, **kwargs)
 
 Async alias for create() method.
@@ -114,18 +92,13 @@ The `SecureCompletionClient` class handles the underlying encryption, key manage
 ### Constructor
 
 ```python
-SecureCompletionClient(
-    router_url: str = "https://api.nomyo.ai",
-    allow_http: bool = False,
-    max_retries: int = 2
-)
+SecureCompletionClient(router_url: str = "https://api.nomyo.ai", allow_http: bool = False)
 ```
 
 **Parameters:**
 
 - `router_url` (str): Base URL of the NOMYO Router (must use HTTPS for production)
 - `allow_http` (bool): Allow HTTP connections (ONLY for local development, never in production)
-- `max_retries` (int): Number of retries on retryable errors (429, 500, 502, 503, 504, network errors). Uses exponential backoff. Default: 2
 
 ### Methods
 
diff --git a/doc/getting-started.md b/doc/getting-started.md
index 1cf78a4..4ccdf82 100644
--- a/doc/getting-started.md
+++ b/doc/getting-started.md
@@ -1,33 +1,5 @@
 # Getting Started
 
-## Try It Now (Demo Credentials)
-
-You can test the client immediately using these public demo credentials — no sign-up required:
-
-| | |
-|---|---|
-| **API key** | `NOMYO_AI_E2EE_INFERENCE` |
-| **Model** | `Qwen/Qwen3-0.6B` |
-
-> **Note:** The demo endpoint uses a fixed 256-token context window and is intended for evaluation only.
-
-```python
-import asyncio
-from nomyo import SecureChatCompletion
-
-async def main():
-    client = SecureChatCompletion(api_key="NOMYO_AI_E2EE_INFERENCE")
-
-    response = await client.create(
-        model="Qwen/Qwen3-0.6B",
-        messages=[{"role": "user", "content": "Hello!"}]
-    )
-
-    print(response['choices'][0]['message']['content'])
-
-asyncio.run(main())
-```
-
 ## Basic Usage
 
 The NOMYO client provides end-to-end encryption (E2E) for all communications between your application and the NOMYO inference endpoints. This ensures that your prompts and responses are protected from unauthorized access or interception.
diff --git a/doc/rate-limits.md b/doc/rate-limits.md
index 2c18da2..7d9f85c 100644
--- a/doc/rate-limits.md
+++ b/doc/rate-limits.md
@@ -48,14 +48,20 @@ HTTP/1.1 503 Service Unavailable
 - **Implement exponential backoff** when you receive a `429` response. Start with a short delay (e.g. 500 ms) and double it on each subsequent failure, up to a reasonable maximum.
 - **Monitor for `503` responses** — repeated occurrences indicate that your usage pattern is triggering the abuse threshold. Refactor your request logic before the cool-down expires.
 
-## Retry Behaviour
-
-The client retries automatically on `429`, `500`, `502`, `503`, `504`, and network errors using exponential backoff (1 s, 2 s, …). The default is **2 retries**. You can raise or disable this per client:
+## Example: Exponential Backoff
 
 ```python
-# More retries for high-throughput workloads
-client = SecureChatCompletion(api_key="...", max_retries=5)
+import asyncio
+import httpx
 
-# Disable retries entirely
-client = SecureChatCompletion(api_key="...", max_retries=0)
+async def request_with_backoff(client, *args, max_retries=5, **kwargs):
+    delay = 0.5
+    for attempt in range(max_retries):
+        response = await client.create(*args, **kwargs)
+        if response.status_code == 429:
+            await asyncio.sleep(delay)
+            delay = min(delay * 2, 30)
+            continue
+        return response
+    raise RuntimeError("Rate limit exceeded after maximum retries")
 ```
diff --git a/doc/security-guide.md b/doc/security-guide.md
index 6e4abdc..6c34f71 100644
--- a/doc/security-guide.md
+++ b/doc/security-guide.md
@@ -162,81 +162,6 @@ Secure memory features:
 - Guarantees zeroing of sensitive memory
 - Prevents memory dumps from containing sensitive data
 
-## Hardware Attestation (TPM 2.0)
-
-### What it is
-
-When the server has a TPM 2.0 chip, every response includes a `tpm_attestation` block in `_metadata`. This is a cryptographically signed hardware quote proving:
-
-- Which firmware and Secure Boot state the server is running (PCR 0, 7)
-- Which application binary is running, when IMA is active (PCR 10)
-
-The quote is signed by an ephemeral AIK (Attestation Identity Key) generated fresh for each request and tied to the `payload_id` nonce, so it cannot be replayed for a different request.
-
-### Reading the attestation
-
-```python
-response = await client.create(
-    model="Qwen/Qwen3-0.6B",
-    messages=[{"role": "user", "content": "..."}],
-    security_tier="maximum"
-)
-
-tpm = response["_metadata"].get("tpm_attestation", {})
-
-if tpm.get("is_available"):
-    print("PCR banks:", tpm["pcr_banks"])         # e.g. "sha256:0,7,10"
-    print("PCR values:", tpm["pcr_values"])        # {bank: {index: hex}}
-    print("AIK key:", tpm["aik_pubkey_b64"][:32], "...")
-else:
-    print("TPM not available on this server")
-```
-
-### Verifying the quote
-
-The response is self-contained: `aik_pubkey_b64` is the full public key of the AIK that signed the quote, so no separate key-fetch round-trip is needed.
-
-Verification steps using `tpm2-pytss`:
-
-```python
-import base64
-from tpm2_pytss.types import TPM2B_PUBLIC, TPMT_SIGNATURE, TPM2B_ATTEST
-
-# 1. Decode the quote components
-aik_pub = TPM2B_PUBLIC.unmarshal(base64.b64decode(tpm["aik_pubkey_b64"]))[0]
-quote   = TPM2B_ATTEST.unmarshal(base64.b64decode(tpm["quote_b64"]))[0]
-sig     = TPMT_SIGNATURE.unmarshal(base64.b64decode(tpm["signature_b64"]))[0]
-
-# 2. Verify the signature over the quote using the AIK public key
-#    (use a TPM ESAPI verify_signature call or an offline RSA verify)
-
-# 3. Inspect the qualifying_data inside the quote — it must match
-#    SHA-256(payload_id.encode())[:16] to confirm this quote is for this request
-
-# 4. Check pcr_values against your known-good baseline
-```
-
-> Full verification requires `tpm2-pytss` on the client side (`pip install tpm2-pytss` + `sudo apt install libtss2-dev`). It is optional — the attestation is informational unless your deployment policy requires verification.
-
-### Behaviour per security tier
-
-| Tier | TPM unavailable |
-|------|----------------|
-| `standard` | `tpm_attestation: {"is_available": false}` — request proceeds |
-| `high` | same as standard |
-| `maximum` | `ServiceUnavailableError` (HTTP 503) — request rejected |
-
-For `maximum` tier, the server enforces TPM availability as a hard requirement. If your server has no TPM and you request `maximum`, catch the error explicitly:
-
-```python
-from nomyo import ServiceUnavailableError
-
-try:
-    response = await client.create(..., security_tier="maximum")
-except ServiceUnavailableError as e:
-    print("Server does not meet TPM requirements for maximum tier:", e)
-```
-
 ## Compliance Considerations
 
 ### HIPAA Compliance
@@ -282,11 +207,9 @@ response = await client.create(
     messages=[{"role": "user", "content": "Hello"}]
 )
 
-print(response["_metadata"])  # Contains security_tier, memory_protection, tpm_attestation, etc.
+print(response["_metadata"])  # Contains security-related information
 ```
 
-See [Hardware Attestation](#hardware-attestation-tpm-20) for details on the `tpm_attestation` field.
-
 ### Logging
 
 Enable logging to see security operations:
diff --git a/nomyo/SecureCompletionClient.py b/nomyo/SecureCompletionClient.py
index 9c503a8..ee81942 100644
--- a/nomyo/SecureCompletionClient.py
+++ b/nomyo/SecureCompletionClient.py
@@ -1,5 +1,5 @@
-import asyncio, ctypes, json, base64, urllib.parse, httpx, os, secrets, sys, warnings, logging
-from typing import Dict, Any, Optional, Union
+import ctypes, json, base64, urllib.parse, httpx, os, secrets, sys, warnings, logging
+from typing import Dict, Any, Optional
 from cryptography.hazmat.primitives import serialization, hashes
 from cryptography.hazmat.primitives.asymmetric import rsa, padding
 from cryptography.hazmat.backends import default_backend
@@ -76,7 +76,7 @@ class SecureCompletionClient:
     - Response parsing
     """
 
-    def __init__(self, router_url: str = "https://api.nomyo.ai", allow_http: bool = False, secure_memory: bool = True, max_retries: int = 2):
+    def __init__(self, router_url: str = "https://api.nomyo.ai", allow_http: bool = False, secure_memory: bool = True):
         """
         Initialize the secure completion client.
 
@@ -84,9 +84,6 @@ class SecureCompletionClient:
             router_url: Base URL of the NOMYO Router (must use HTTPS for production)
             allow_http: Allow HTTP connections (ONLY for local development, never in production)
             secure_memory: Whether to use secure memory operations for this instance.
-            max_retries: Number of retries on retryable errors (429, 500, 502, 503, 504,
-                         network errors). Uses exponential backoff. Default 2, matching
-                         the OpenAI Python SDK default.
         """
         self.router_url = router_url.rstrip('/')
         self.private_key = None
@@ -94,7 +91,6 @@ class SecureCompletionClient:
         self.key_size = 4096  # RSA key size
         self.allow_http = allow_http  # Store for use in fetch_server_public_key
         self._use_secure_memory = _SECURE_MEMORY_AVAILABLE and secure_memory
-        self.max_retries = max_retries
 
         # Validate HTTPS for security
         if not self.router_url.startswith("https://"):
@@ -301,7 +297,7 @@ class SecureCompletionClient:
             verify_ssl = self.router_url.startswith("https://")
 
             async with httpx.AsyncClient(
-                timeout=900.0,
+                timeout=60.0,
                 verify=verify_ssl,  # Verify SSL/TLS certificates for HTTPS
             ) as client:
                 response = await client.get(url)
@@ -663,22 +659,13 @@ class SecureCompletionClient:
         url = f"{self.router_url}/v1/chat/secure_completion"
         logger.debug("Target URL: %s", url)
 
-        _RETRYABLE_STATUS_CODES = {429, 500, 502, 503, 504}
-        last_exc: Exception = APIConnectionError("Request failed")
-
-        for attempt in range(self.max_retries + 1):
-            if attempt > 0:
-                delay = 2 ** (attempt - 1)  # 1s, 2s, 4s, …
-                logger.warning("Retrying request (attempt %d/%d) after %.1fs...", attempt, self.max_retries, delay)
-                await asyncio.sleep(delay)
-
-            try:
-                async with httpx.AsyncClient(timeout=900.0) as client:
-                    response = await client.post(
-                        url,
-                        headers=headers,
-                        content=encrypted_payload
-                    )
+        try:
+            async with httpx.AsyncClient(timeout=60.0) as client:
+                response = await client.post(
+                    url,
+                    headers=headers,
+                    content=encrypted_payload
+                )
 
                 logger.debug("HTTP Status: %d", response.status_code)
 
@@ -689,6 +676,7 @@ class SecureCompletionClient:
                     return decrypted_response
 
                 elif response.status_code == 400:
+                    # Bad request
                     try:
                         error = response.json()
                         raise InvalidRequestError(
@@ -700,6 +688,7 @@ class SecureCompletionClient:
                         raise InvalidRequestError("Bad request: Invalid response format")
 
                 elif response.status_code == 401:
+                    # Unauthorized - authentication failed
                     try:
                         error = response.json()
                         error_message = error.get('detail', 'Invalid API key or authentication failed')
@@ -712,6 +701,7 @@ class SecureCompletionClient:
                         raise AuthenticationError("Invalid API key or authentication failed")
 
                 elif response.status_code == 403:
+                    # Forbidden - model not allowed for security tier
                     try:
                         error = response.json()
                         raise ForbiddenError(
@@ -723,6 +713,7 @@ class SecureCompletionClient:
                         raise ForbiddenError("Forbidden: Model not allowed for the requested security tier")
 
                 elif response.status_code == 404:
+                    # Endpoint not found
                     try:
                         error = response.json()
                         raise APIError(
@@ -733,47 +724,44 @@ class SecureCompletionClient:
                     except (json.JSONDecodeError, ValueError):
                         raise APIError("Endpoint not found: Secure inference not enabled")
 
-                elif response.status_code in _RETRYABLE_STATUS_CODES:
+                elif response.status_code == 429:
+                    # Rate limit exceeded
                     try:
                         error = response.json()
-                        if not isinstance(error, dict):
-                            error = {"detail": "unknown"}
-                        detail_msg = error.get("detail", "unknown")
-                    except (json.JSONDecodeError, ValueError):
-                        error = {}
-                        detail_msg = "unknown"
-
-                    if response.status_code == 429:
-                        last_exc = RateLimitError(
-                            f"Rate limit exceeded: {detail_msg}",
+                        raise RateLimitError(
+                            f"Rate limit exceeded: {error.get('detail', 'Too many requests')}",
                             status_code=429,
                             error_details=error
                         )
-                    elif response.status_code == 500:
-                        last_exc = ServerError(
-                            f"Server error: {detail_msg}",
+                    except (json.JSONDecodeError, ValueError):
+                        raise RateLimitError("Rate limit exceeded: Too many requests")
+
+                elif response.status_code == 500:
+                    # Server error
+                    try:
+                        error = response.json()
+                        raise ServerError(
+                            f"Server error: {error.get('detail', 'Internal server error')}",
                             status_code=500,
                             error_details=error
                         )
-                    elif response.status_code == 503:
-                        last_exc = ServiceUnavailableError(
-                            f"Service unavailable: {detail_msg}",
+                    except (json.JSONDecodeError, ValueError):
+                        raise ServerError("Server error: Internal server error")
+
+                elif response.status_code == 503:
+                    # Service unavailable - inference backend is down
+                    try:
+                        error = response.json()
+                        raise ServiceUnavailableError(
+                            f"Service unavailable: {error.get('detail', 'Inference backend is unavailable')}",
                             status_code=503,
                             error_details=error
                         )
-                    else:
-                        last_exc = APIError(
-                            f"Unexpected status code: {response.status_code} {detail_msg}",
-                            status_code=response.status_code,
-                            error_details=error
-                        )
-
-                    if attempt < self.max_retries:
-                        logger.warning("Got retryable status %d: %s", response.status_code, detail_msg)
-                        continue
-                    raise last_exc
+                    except (json.JSONDecodeError, ValueError):
+                        raise ServiceUnavailableError("Service unavailable: Inference backend is unavailable")
 
                 else:
+                    # Unexpected status code
                     try:
                         unexp_detail = response.json()
                         if not isinstance(unexp_detail, dict):
@@ -786,17 +774,13 @@ class SecureCompletionClient:
                         status_code=response.status_code
                     )
 
-            except httpx.NetworkError as e:
-                last_exc = APIConnectionError(f"Failed to connect to router: {e}")
-                if attempt < self.max_retries:
-                    logger.warning("Network error on attempt %d: %s", attempt, e)
-                    continue
-                raise last_exc
-            except (SecurityError, APIError, AuthenticationError, InvalidRequestError, ForbiddenError, RateLimitError, ServerError, ServiceUnavailableError, APIConnectionError):
-                raise  # Non-retryable — propagate immediately
-            except Exception:
-                logger.exception("Unexpected error in send_secure_request")
-                raise APIConnectionError("Request failed due to an unexpected error")
+        except httpx.NetworkError as e:
+            raise APIConnectionError(f"Failed to connect to router: {e}")
+        except (SecurityError, APIError, AuthenticationError, InvalidRequestError, ForbiddenError, RateLimitError, ServerError, ServiceUnavailableError, APIConnectionError):
+            raise  # Re-raise known exceptions
+        except Exception:
+            logger.exception("Unexpected error in send_secure_request")
+            raise APIConnectionError("Request failed due to an unexpected error")
 
     def _validate_rsa_key(self, key, key_type: str = "private") -> None:
         """
diff --git a/nomyo/__init__.py b/nomyo/__init__.py
index 38d9bef..0a81157 100644
--- a/nomyo/__init__.py
+++ b/nomyo/__init__.py
@@ -51,6 +51,6 @@ try:
 except ImportError:
     pass
 
-__version__ = "0.2.9"
+__version__ = "0.2.5"
 __author__ = "NOMYO AI"
 __license__ = "Apache-2.0"
diff --git a/nomyo/nomyo.py b/nomyo/nomyo.py
index 95709b2..682997b 100644
--- a/nomyo/nomyo.py
+++ b/nomyo/nomyo.py
@@ -52,7 +52,7 @@ class SecureChatCompletion:
         ```
     """
 
-    def __init__(self, base_url: str = "https://api.nomyo.ai", allow_http: bool = False, api_key: Optional[str] = None, secure_memory: bool = True, key_dir: Optional[str] = None, max_retries: int = 2):
+    def __init__(self, base_url: str = "https://api.nomyo.ai", allow_http: bool = False, api_key: Optional[str] = None, secure_memory: bool = True, key_dir: Optional[str] = None):
         """
         Initialize the secure chat completion client.
 
@@ -68,10 +68,8 @@ class SecureChatCompletion:
                           Set to False for testing or when security is not required.
             key_dir: Directory to load/save RSA keys. If None, ephemeral keys are
                      generated in memory for this session only.
-            max_retries: Number of retries on retryable errors (429, 500, 502, 503, 504,
-                        network errors). Uses exponential backoff. Default 2.
         """
-        self.client = SecureCompletionClient(router_url=base_url, allow_http=allow_http, secure_memory=secure_memory, max_retries=max_retries)
+        self.client = SecureCompletionClient(router_url=base_url, allow_http=allow_http, secure_memory=secure_memory)
         self._keys_initialized = False
         self._keys_lock = asyncio.Lock()
         self.api_key = api_key
diff --git a/pyproject.toml b/pyproject.toml
index c42fef8..e0902c3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,10 +1,10 @@
 [build-system]
-requires = ["hatchling>=1.0.0", "wheel"]
-build-backend = "hatchling.build"
+requires = ["setuptools>=68", "wheel", "Cython>=3.0"]
+build-backend = "setuptools.build_meta"
 
 [project]
 name = "nomyo"
-version = "0.2.9"
+version = "0.2.5"
 description = "OpenAI-compatible secure chat client with end-to-end encryption for NOMYO Inference Endpoints"
 authors = [
     {name = "NOMYO.AI", email = "ichi@nomyo.ai"},
@@ -27,16 +27,16 @@ classifiers = [
 ]
 requires-python = ">=3.10"
 dependencies = [
-    "anyio==4.13.0",
-    "certifi==2026.5.20",
+    "anyio==4.12.0",
+    "certifi==2025.11.12",
     "cffi==2.0.0",
-    "cryptography==48.0.0",
+    "cryptography==46.0.6",
     "exceptiongroup==1.3.1",
     "h11==0.16.0",
     "httpcore==1.0.9",
     "httpx==0.28.1",
-    "idna==3.16",
-    "pycparser==3.0",
+    "idna==3.11",
+    "pycparser==2.23",
     "typing_extensions==4.15.0",
 ]
 
@@ -46,8 +46,5 @@ Documentation = "https://bitfreedom.net/code/nomyo-ai/nomyo/wiki/NOMYO-Secure-Cl
 Repository = "https://bitfreedom.net/code/nomyo-ai/nomyo"
 Issues = "https://bitfreedom.net/code/nomyo-ai/nomyo/issues"
 
-[tool.hatch.build.targets.wheel]
-packages = ["nomyo"]
-
-[tool.hatch.build.targets.sdist]
-exclude = ["test/", "build.sh", "dist/"]
+[tool.setuptools.packages.find]
+include = ["nomyo*"]
diff --git a/renovate.json b/renovate.json
deleted file mode 100644
index 6af7325..0000000
--- a/renovate.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-  "$schema": "https://docs.renovatebot.com/renovate-schema.json",
-  "extends": [
-    "local>nomyo-ai/renovate-config"
-  ]
-}
diff --git a/requirements.txt b/requirements.txt
index 2e17834..868c9f0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,11 +1,11 @@
-anyio==4.13.0
-certifi==2026.5.20
+anyio==4.12.0
+certifi==2025.11.12
 cffi==2.0.0
-cryptography==48.0.0
+cryptography==46.0.6
 exceptiongroup==1.3.1
 h11==0.16.0
 httpcore==1.0.9
 httpx==0.28.1
-idna==3.16
-pycparser==3.0
+idna==3.11
+pycparser==2.23
 typing_extensions==4.15.0
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..92d0bd3
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,31 @@
+from setuptools import setup
+from setuptools.command.build_py import build_py as _build_py
+from Cython.Build import cythonize
+
+# Modules compiled to .so — exclude their .py source from the wheel
+COMPILED_MODULES = {"nomyo", "SecureCompletionClient", "SecureMemory"}
+
+
+class BuildPyNoPy(_build_py):
+    """Skip copying .py source files for cythonized modules."""
+
+    def find_package_modules(self, package, package_dir):
+        modules = super().find_package_modules(package, package_dir)
+        return [
+            (pkg, mod, path)
+            for pkg, mod, path in modules
+            if not (pkg == "nomyo" and mod in COMPILED_MODULES)
+        ]
+
+
+setup(
+    ext_modules=cythonize(
+        [
+            "nomyo/nomyo.py",
+            "nomyo/SecureCompletionClient.py",
+            "nomyo/SecureMemory.py",
+        ],
+        compiler_directives={"language_level": "3"},
+    ),
+    cmdclass={"build_py": BuildPyNoPy},
+)