fix: base_url

doc: created
2026-04-16 16:44:26 +02:00 · 2026-04-16 16:44:26 +02:00 · 43165f86f2
commit 43165f86f2
parent 6e02559f4e
17 changed files with 2151 additions and 293 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -0,0 +1,11 @@
 # Contributing
 ## Development Setup
 ```bash
 npm install
 npm run build
 npm test
 ```
 Node.js 18 LTS or later is required for tests and the TypeScript compiler.
--- a/README.md
+++ b/README.md
@ -1,13 +1,13 @@
-# NOMYO.js - Secure JavaScript Chat Client
+# NOMYO.js — Secure JavaScript Chat Client
 **OpenAI-compatible secure chat client with end-to-end encryption for NOMYO Inference Endpoints**
-🔒 **All prompts and responses are automatically encrypted and decrypted**  
+- All prompts and responses are automatically encrypted and decrypted
-🔑 **Uses hybrid encryption (AES-256-GCM + RSA-OAEP with 4096-bit keys)**  
+- Hybrid encryption: AES-256-GCM payload + RSA-OAEP-SHA256 key exchange, 4096-bit keys
-🔄 **Drop-in replacement for OpenAI's ChatCompletion API**  
+- Drop-in replacement for OpenAI's ChatCompletion API
-🌐 **Works in both Node.js and browsers**
+- Works in both Node.js and browsers
-## 🚀 Quick Start
+## Quick Start
 ### Installation
@ -20,371 +20,323 @@ npm install nomyo-js
 ```javascript
 import { SecureChatCompletion } from 'nomyo-js';
 // Initialize client (defaults to https://api.nomyo.ai:12434)
 const client = new SecureChatCompletion({
-  baseUrl: 'https://api.nomyo.ai:12434'
+  apiKey: process.env.NOMYO_API_KEY,
 });
 // Simple chat completion
 const response = await client.create({
  model: 'Qwen/Qwen3-0.6B',
-  messages: [
+  messages: [{ role: 'user', content: 'Hello!' }],
-    { role: 'user', content: 'Hello! How are you today?' }
+  temperature: 0.7,
  ],
  temperature: 0.7
 });
 console.log(response.choices[0].message.content);
 client.dispose();
 ```
 ### Basic Usage (Browser)
 ```html
-<!DOCTYPE html>
+<script type="module">
-<html>
+  import { SecureChatCompletion } from 'https://unpkg.com/nomyo-js/dist/browser/index.js';
 <head>
  <script type="module">
    import { SecureChatCompletion } from 'https://unpkg.com/nomyo-js/dist/browser/index.js';
-    const client = new SecureChatCompletion({
+  const client = new SecureChatCompletion({
-      baseUrl: 'https://api.nomyo.ai:12434'
+    baseUrl: 'https://api.nomyo.ai',
-    });
+    apiKey:  'your-api-key',
  });
-    const response = await client.create({
+  const response = await client.create({
-      model: 'Qwen/Qwen3-0.6B',
+    model: 'Qwen/Qwen3-0.6B',
-      messages: [
+    messages: [{ role: 'user', content: 'What is 2+2?' }],
-        { role: 'user', content: 'What is 2+2?' }
+  });
      ]
    });
-    console.log(response.choices[0].message.content);
+  console.log(response.choices[0].message.content);
-  </script>
+</script>
 </head>
 <body>
  <h1>NOMYO Secure Chat</h1>
 </body>
 </html>
 ```
-## 🔐 Security Features
+## Documentation
 Full documentation is in the [`doc/`](doc/) directory:
 - [Getting Started](doc/getting-started.md) — walkthrough for new users
 - [API Reference](doc/api-reference.md) — complete constructor options, methods, types, and error classes
 - [Models](doc/models.md) — available models and selection guide
 - [Security Guide](doc/security-guide.md) — encryption, memory protection, key management, compliance
 - [Rate Limits](doc/rate-limits.md) — limits, automatic retry behaviour, batch throttling
 - [Examples](doc/examples.md) — 12+ code examples for common scenarios
 - [Troubleshooting](doc/troubleshooting.md) — error reference and debugging tips
 ## Security Features
 ### Hybrid Encryption
-**Payload encryption**: AES-256-GCM (authenticated encryption)
+- **Payload encryption**: AES-256-GCM (authenticated encryption)
- **Key exchange**: RSA-OAEP with SHA-256
+- **Key exchange**: RSA-OAEP-SHA256
 - **Key size**: 4096-bit RSA keys
- **All communication**: End-to-end encrypted
+- **Scope**: All communication is end-to-end encrypted
 ### Key Management
- **Automatic key generation**: Keys are automatically generated on first use
+- **Automatic**: Keys are generated on first use and saved to `keyDir` (default: `client_keys/`). Existing keys are reloaded on subsequent runs. Node.js only.
- **Automatic key loading**: Existing keys are loaded automatically from `client_keys/` directory (Node.js only)
+- **Password protection**: Optional AES-encrypted private key files (minimum 8 characters).
- **No manual intervention required**: The library handles key management automatically
+- **Secure permissions**: Private key files saved at `0600` (owner-only).
- **Optional persistence**: Keys can be saved to `client_keys/` directory for reuse across sessions (Node.js only)
+- **Auto-rotation**: Keys rotate every 24 hours by default (configurable via `keyRotationInterval`).
- **Password protection**: Optional password encryption for private keys (minimum 8 characters required)
+- **Explicit lifecycle**: Call `dispose()` to zero in-memory key material and stop the rotation timer.
 - **Secure permissions**: Private keys stored with restricted permissions (600 - owner-only access)
 - **Automatic key rotation**: Keys are rotated on a configurable interval (default: 24 hours) to limit fingerprint lifetime
 - **Explicit lifecycle management**: Call `dispose()` to immediately zero in-memory key material and stop the rotation timer
-### Secure Memory Protection
+### Secure Memory
-> [!NOTE]
+The library wraps all intermediate sensitive buffers (AES keys, plaintext payload, decrypted bytes) in `SecureByteContext`, which zeroes them in a `finally` block immediately after use.
 > **Pure JavaScript Implementation**: This version uses pure JavaScript with immediate memory zeroing.
 > OS-level memory locking (`mlock`) is NOT available without a native addon.
 > For enhanced security in production, consider implementing the optional native addon (see `native/` directory).
- **Automatic cleanup**: Sensitive data is zeroed from memory immediately after use
+Pure JavaScript cannot lock pages to prevent OS swapping (`mlock`). For environments where swap-file exposure is unacceptable, install the optional `nomyo-native` addon. Check the current protection level:
 - **Best-effort protection**: Minimizes exposure time of sensitive data
 - **Fallback mechanism**: Graceful degradation if enhanced security is unavailable
 ## 🔄 OpenAI Compatibility
 The `SecureChatCompletion` class provides **exact API compatibility** with OpenAI's `ChatCompletion.create()` method.
 ### Supported Parameters
 All standard OpenAI parameters are supported:
 - `model`: Model identifier
 - `messages`: List of message objects
 - `temperature`: Sampling temperature (0-2)
 - `max_tokens`: Maximum tokens to generate
 - `top_p`: Nucleus sampling
 - `frequency_penalty`: Frequency penalty
 - `presence_penalty`: Presence penalty
 - `stop`: Stop sequences
 - `n`: Number of completions
 - `tools`: Tool definitions
 - `tool_choice`: Tool selection strategy
 - `user`: User identifier
 ### Response Format
 Responses follow the OpenAI format exactly, with an additional `_metadata` field for debugging and security information:
 ```javascript
-{
+import { getMemoryProtectionInfo } from 'nomyo-js';
-  "id": "chatcmpl-123",
+
-  "object": "chat.completion",
+const info = getMemoryProtectionInfo();
-  "created": 1234567890,
+// Without addon: { method: 'zero-only', canLock: false }
-  "model": "Qwen/Qwen3-0.6B",
+// With addon:    { method: 'mlock',     canLock: true  }
-  "choices": [
+```
-    {
+
-      "index": 0,
+### Security Tiers
-      "message": {
+
-        "role": "assistant",
+Pass `security_tier` per request to route inference to increasingly isolated hardware:
-        "content": "Hello! I'm doing well, thank you for asking."
+
-      },
+| Tier | Hardware | Use case |
-      "finish_reason": "stop"
+|------|----------|----------|
-    }
+| `"standard"` | GPU | General secure inference |
-  ],
+| `"high"` | CPU/GPU balanced | Sensitive business data |
-  "usage": {
+| `"maximum"` | CPU only | HIPAA PHI, classified data |
-    "prompt_tokens": 10,
+
-    "completion_tokens": 20,
+```javascript
-    "total_tokens": 30
+const response = await client.create({
-  },
+  model:         'Qwen/Qwen3-0.6B',
-  "_metadata": {
+  messages:      [{ role: 'user', content: 'Patient record summary...' }],
-    "payload_id": "openai-compat-abc123",
+  security_tier: 'maximum',
-    "processed_at": 1765250382,
+});
-    "is_encrypted": true,
+```
-    "encryption_algorithm": "hybrid-aes256-rsa4096",
+
-    "response_status": "success"
+## Usage Examples
-  }
+
 ### With API Key
 ```javascript
 const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY });
 ```
 ### Error Handling
 ```javascript
 import {
  SecureChatCompletion,
  AuthenticationError,
  RateLimitError,
  ForbiddenError,
 } from 'nomyo-js';
 try {
  const response = await client.create({ model: 'Qwen/Qwen3-0.6B', messages: [...] });
 } catch (err) {
  if (err instanceof AuthenticationError) console.error('Check API key:', err.message);
  else if (err instanceof RateLimitError)    console.error('Rate limit hit:', err.message);
  else if (err instanceof ForbiddenError)    console.error('Model/tier mismatch:', err.message);
  else throw err;
 }
 ```
-## 🛠️ Usage Examples
+### Per-Request Router Override
-### Basic Chat
+Send a single request to a different router without changing the main client:
 ```javascript
 import { SecureChatCompletion } from 'nomyo-js';
 const client = new SecureChatCompletion({
  baseUrl: 'https://api.nomyo.ai:12434'
 });
 const response = await client.create({
-  model: 'Qwen/Qwen3-0.6B',
+  model:    'Qwen/Qwen3-0.6B',
-  messages: [
+  messages: [{ role: 'user', content: 'Hello from secondary router' }],
-    { role: 'system', content: 'You are a helpful assistant.' },
+  base_url: 'https://secondary.nomyo.ai:12435',  // temporary — main client unchanged
    { role: 'user', content: 'What is the capital of France?' }
  ],
  temperature: 0.7
 });
 console.log(response.choices[0].message.content);
 ```
-### With Tools
+### Tool / Function Calling
 ```javascript
 const response = await client.create({
-  model: 'Qwen/Qwen3-0.6B',
+  model:    'Qwen/Qwen3-0.6B',
-  messages: [
+  messages: [{ role: 'user', content: "What's the weather in Paris?" }],
    { role: 'user', content: "What's the weather in Paris?" }
  ],
  tools: [
    {
      type: 'function',
      function: {
-        name: 'get_weather',
+        name:        'get_weather',
-        description: 'Get weather information',
+        description: 'Get weather information for a location',
        parameters: {
          type: 'object',
-          properties: {
+          properties: { location: { type: 'string' } },
-            location: { type: 'string' }
+          required: ['location'],
-          },
+        },
-          required: ['location']
+      },
-        }
+    },
-      }
+  ],
-    }
+  tool_choice: 'auto',
  ]
 });
 ```
-### With API Key Authentication
+### Thinking Models
 ```javascript
 const client = new SecureChatCompletion({
  baseUrl: 'https://api.nomyo.ai:12434',
  apiKey: 'your-api-key-here'
 });
 // API key will be automatically included in all requests
 const response = await client.create({
-  model: 'Qwen/Qwen3-0.6B',
+  model:    'LiquidAI/LFM2.5-1.2B-Thinking',
-  messages: [
+  messages: [{ role: 'user', content: 'Is 9.9 larger than 9.11?' }],
    { role: 'user', content: 'Hello!' }
  ]
 });
 ```
 ### Custom Key Management (Node.js)
 ```javascript
 import { SecureCompletionClient } from 'nomyo-js';
 const client = new SecureCompletionClient({
  routerUrl: 'https://api.nomyo.ai:12434'
 });
-// Generate keys with password protection
+const { content, reasoning_content } = response.choices[0].message;
-await client.generateKeys({
+console.log('Reasoning:', reasoning_content);
-  saveToFile: true,
+console.log('Answer:',    content);
  keyDir: 'client_keys',
  password: 'your-secure-password'
 });
 // Or load existing keys
 await client.loadKeys(
  'client_keys/private_key.pem',
  'client_keys/public_key.pem',
  'your-secure-password'
 );
 ```
 ### Resource Management
 Always call `dispose()` when finished to zero key material and stop the background rotation timer:
 ```javascript
-const client = new SecureChatCompletion({
+const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY });
  baseUrl: 'https://api.nomyo.ai:12434',
  keyRotationInterval: 3600000,  // rotate every hour
 });
 try {
  const response = await client.create({ model: 'Qwen/Qwen3-0.6B', messages: [...] });
  console.log(response.choices[0].message.content);
 } finally {
-  client.dispose();
+  client.dispose();  // zeros key material, stops rotation timer
 }
 ```
 To disable key rotation entirely (e.g. short-lived scripts):
 ```javascript
 const client = new SecureChatCompletion({
  baseUrl: 'https://api.nomyo.ai:12434',
  keyRotationInterval: 0,  // disabled
 });
 ```
 ## 🧪 Platform Support
 ### Node.js
 - **Minimum version**: Node.js 14.17+
 - **Recommended**: Node.js 18 LTS or later
 - **Key storage**: File system (`client_keys/` directory)
 - **Security**: Full implementation with automatic key persistence
 ### Browsers
 - **Supported browsers**: Modern browsers with Web Crypto API support
  - Chrome 37+
  - Firefox 34+
  - Safari 11+
  - Edge 79+
 - **Key storage**: In-memory only (keys not persisted for security)
 - **Security**: Best-effort memory protection (no OS-level locking)
 ## 📚 API Reference
 ### SecureChatCompletion
 #### Constructor
 ```typescript
 new SecureChatCompletion(config?: {
  baseUrl?: string;                // Default: 'https://api.nomyo.ai:12434'
  allowHttp?: boolean;             // Default: false
  apiKey?: string;                 // Default: undefined
  secureMemory?: boolean;          // Default: true
  timeout?: number;                // Request timeout ms. Default: 60000
  debug?: boolean;                 // Enable verbose logging. Default: false
  keyRotationInterval?: number;    // Key rotation ms. 0 = disabled. Default: 86400000 (24h)
  keyRotationDir?: string;         // Directory for rotated keys. Default: 'client_keys'
  keyRotationPassword?: string;    // Password for rotated key files
 })
 ```
 #### Methods
 - `create(request: ChatCompletionRequest): Promise<ChatCompletionResponse>`
 - `acreate(request: ChatCompletionRequest): Promise<ChatCompletionResponse>` (alias)
 - `dispose(): void` — zero key material and stop rotation timer
 ### SecureCompletionClient
 Lower-level API for advanced use cases.
 #### Constructor
 ```typescript
 new SecureCompletionClient(config?: {
  routerUrl?: string;              // Default: 'https://api.nomyo.ai:12434'
  allowHttp?: boolean;             // Default: false
  secureMemory?: boolean;          // Default: true
  keySize?: 2048 | 4096;          // Default: 4096
  timeout?: number;                // Request timeout ms. Default: 60000
  debug?: boolean;                 // Enable verbose logging. Default: false
  keyRotationInterval?: number;    // Key rotation ms. 0 = disabled. Default: 86400000 (24h)
  keyRotationDir?: string;         // Directory for rotated keys. Default: 'client_keys'
  keyRotationPassword?: string;    // Password for rotated key files
 })
 ```
 #### Methods
 - `generateKeys(options?: KeyGenOptions): Promise<void>`
 - `loadKeys(privateKeyPath: string, publicKeyPath?: string, password?: string): Promise<void>`
 - `fetchServerPublicKey(): Promise<string>`
 - `encryptPayload(payload: object): Promise<ArrayBuffer>`
 - `decryptResponse(encrypted: ArrayBuffer, payloadId: string): Promise<object>`
 - `sendSecureRequest(payload: object, payloadId: string, apiKey?: string): Promise<object>`
 - `dispose(): void` — zero key material and stop rotation timer
 ## 🔧 Configuration
 ### Local Development (HTTP)
 ```javascript
 const client = new SecureChatCompletion({
-  baseUrl: 'http://localhost:12434',
+  baseUrl:   'http://localhost:12435',
-  allowHttp: true  // Required for HTTP connections
+  allowHttp: true,  // required — also prints a visible warning
 });
 ```
-⚠️ **Warning**: Only use HTTP for local development. Never use in production!
+## API Reference
-### Disable Secure Memory
+### `SecureChatCompletion` — Constructor Options
-```javascript
+```typescript
-const client = new SecureChatCompletion({
+new SecureChatCompletion(config?: ChatCompletionConfig)
  baseUrl: 'https://api.nomyo.ai:12434',
  secureMemory: false  // Disable memory protection (not recommended)
 });
 ```
-## 📝 Security Best Practices
+| Option | Type | Default | Description |
 |--------|------|---------|-------------|
 | `baseUrl` | `string` | `'https://api.nomyo.ai'` | NOMYO router URL. Must be HTTPS in production. |
 | `allowHttp` | `boolean` | `false` | Allow HTTP connections. Local development only. |
 | `apiKey` | `string` | `undefined` | Bearer token for `Authorization` header. |
 | `secureMemory` | `boolean` | `true` | Zero sensitive buffers immediately after use. |
 | `timeout` | `number` | `60000` | Request timeout in milliseconds. |
 | `debug` | `boolean` | `false` | Print verbose logging to the console. |
 | `keyDir` | `string` | `'client_keys'` | Directory to load/save RSA keys on startup. |
 | `keyRotationInterval` | `number` | `86400000` | Auto-rotate keys every N ms. `0` disables rotation. |
 | `keyRotationDir` | `string` | `'client_keys'` | Directory for rotated key files. Node.js only. |
 | `keyRotationPassword` | `string` | `undefined` | Password for encrypted rotated key files. |
 | `maxRetries` | `number` | `2` | Extra retry attempts on 429/5xx/network errors. Exponential backoff (1 s, 2 s, …). |
- ✅ Always use HTTPS in production
+#### Methods
 - ✅ Use password protection for private keys (Node.js)
 - ✅ Keep private keys secure (permissions set to 600)
 - ✅ Never share your private key
 - ✅ Verify server's public key fingerprint before first use
 - ✅ Enable secure memory protection (default)
-## 🤝 Contributing
+- `create(request): Promise<ChatCompletionResponse>` — send an encrypted chat completion
 - `acreate(request): Promise<ChatCompletionResponse>` — alias for `create()`
 - `dispose(): void` — zero key material and stop the rotation timer
-Contributions are welcome! Please open issues or pull requests on the project repository.
+#### `create()` Request Fields
-## 📄 License
+All standard OpenAI fields (`model`, `messages`, `temperature`, `top_p`, `max_tokens`, `stop`, `n`, `tools`, `tool_choice`, `user`, `frequency_penalty`, `presence_penalty`, `logit_bias`) plus:
-See LICENSE file for licensing information.
+| Field | Description |
 |-------|-------------|
 | `security_tier` | `"standard"` \| `"high"` \| `"maximum"` — hardware isolation level |
 | `api_key` | Per-request API key override |
 | `base_url` | Per-request router URL override — creates a temporary client, used once, then disposed |
-## 📞 Support
+### `SecureCompletionClient` — Constructor Options
-For questions or issues, please refer to the project documentation or open an issue.
+Lower-level client. All options above apply, with these differences:
 | Option | Type | Default | Description |
 |--------|------|---------|-------------|
 | `routerUrl` | `string` | `'https://api.nomyo.ai'` | Base URL (`baseUrl` is renamed here) |
 | `keySize` | `2048 \| 4096` | `4096` | RSA modulus length |
 #### Methods
 - `generateKeys(options?)` — generate a new RSA key pair
 - `loadKeys(privateKeyPath, publicKeyPath?, password?)` — load existing PEM files
 - `fetchServerPublicKey()` — fetch the server's RSA public key
 - `encryptPayload(payload)` — encrypt a request payload
 - `decryptResponse(encrypted, payloadId)` — decrypt a response body
 - `sendSecureRequest(payload, payloadId, apiKey?, securityTier?)` — full encrypt → POST → decrypt cycle
 - `dispose()` — zero key material and stop rotation timer
 ### Secure Memory Public API
 ```typescript
 import {
  getMemoryProtectionInfo,
  disableSecureMemory,
  enableSecureMemory,
  SecureByteContext,
 } from 'nomyo-js';
 ```
 | Export | Description |
 |--------|-------------|
 | `getMemoryProtectionInfo()` | Returns `{ method, canLock, isPlatformSecure, details? }` |
 | `disableSecureMemory()` | Disable global secure-memory zeroing |
 | `enableSecureMemory()` | Re-enable global secure-memory zeroing |
 | `SecureByteContext` | Low-level buffer wrapper — zeros in `finally` block |
 ### Error Classes
 ```typescript
 import {
  AuthenticationError, InvalidRequestError, RateLimitError,
  ForbiddenError, ServerError, ServiceUnavailableError,
  APIConnectionError, SecurityError, DisposedError, APIError,
 } from 'nomyo-js';
 ```
 | Class | HTTP | Thrown when |
 |-------|------|-------------|
 | `AuthenticationError` | 401 | Invalid or missing API key |
 | `InvalidRequestError` | 400 | Malformed request |
 | `ForbiddenError` | 403 | Model not allowed for the security tier |
 | `RateLimitError` | 429 | Rate limit exceeded (after all retries) |
 | `ServerError` | 500 | Internal server error (after all retries) |
 | `ServiceUnavailableError` | 503 | Backend unavailable (after all retries) |
 | `APIError` | varies | Other HTTP errors |
 | `APIConnectionError` | — | Network failure or timeout (after all retries) |
 | `SecurityError` | — | HTTPS not used, header injection, or crypto failure |
 | `DisposedError` | — | Method called after `dispose()` |
 ## Platform Support
 ### Node.js
 - **Minimum**: Node.js 14.17+
 - **Recommended**: Node.js 18 LTS or later
 - **Key storage**: File system (`keyDir` directory, default `client_keys/`)
 ### Browsers
 - **Supported**: Chrome 37+, Firefox 34+, Safari 11+, Edge 79+
 - **Key storage**: In-memory only (not persisted)
 - **Limitation**: File-based key operations (`keyDir`, `loadKeys`) are not available
 ## Security Best Practices
 - Always use HTTPS (`allowHttp` is `false` by default)
 - Load API key from an environment variable, never hardcode it
 - Use password-protected key files (`keyRotationPassword`)
 - Store keys outside the project directory and outside version control
 - Add `client_keys/` and `*.pem` to `.gitignore`
 - Call `dispose()` when the client is no longer needed
 - Use `security_tier: 'maximum'` for HIPAA PHI or classified data
 - Consider the `nomyo-native` addon if swap-file exposure is unacceptable
 ## License
 See LICENSE file.
--- a/doc/README.md
+++ b/doc/README.md
@ -0,0 +1,49 @@
 # NOMYO.js Documentation
 Comprehensive documentation for the NOMYO secure JavaScript/TypeScript chat client — a drop-in replacement for OpenAI's `ChatCompletion` API with end-to-end encryption.
 To use this library you need an active subscription on [NOMYO Inference](https://chat.nomyo.ai/).
 ## Quick Start
 ```javascript
 import { SecureChatCompletion } from 'nomyo-js';
 const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY });
 const response = await client.create({
  model: 'Qwen/Qwen3-0.6B',
  messages: [{ role: 'user', content: 'Hello!' }],
  security_tier: 'standard',
 });
 console.log(response.choices[0].message.content);
 ```
 ## Documentation
 1. [Installation](installation.md) — npm, CDN, and native addon setup
 2. [Getting Started](getting-started.md) — first request, auth, security tiers, error handling
 3. [API Reference](api-reference.md) — complete constructor options, methods, and types
 4. [Models](models.md) — available models and selection guidance
 5. [Security Guide](security-guide.md) — encryption architecture, best practices, and compliance
 6. [Rate Limits](rate-limits.md) — request limits, burst behaviour, and retry strategy
 7. [Examples](examples.md) — real-world scenarios, browser usage, and advanced patterns
 8. [Troubleshooting](troubleshooting.md) — common errors and their fixes
 ---
 ## Key Features
 - **End-to-end encryption** — AES-256-GCM + RSA-OAEP-4096. No plaintext ever leaves your process.
 - **OpenAI-compatible API** — `create()` / `acreate()` accept the same parameters as the OpenAI SDK.
 - **Browser + Node.js** — single package, separate entry points for each runtime.
 - **Automatic key management** — keys are generated on first use and optionally persisted to disk (Node.js).
 - **Automatic key rotation** — RSA keys rotate on a configurable interval (default 24 h) to limit fingerprint lifetime.
 - **Security tiers** — per-request routing to `standard`, `high`, or `maximum` isolation hardware.
 - **Retry with exponential backoff** — automatic retries on 429 / 5xx / network errors (configurable).
 - **Resource lifecycle** — `dispose()` immediately zeros in-memory key material and stops the rotation timer.
 ## Technical Security Docs
 For cryptographic architecture, threat model, and implementation status see [SECURITY.md](../docs/SECURITY.md).
--- a/doc/api-reference.md
+++ b/doc/api-reference.md
@ -0,0 +1,272 @@
 # API Reference
 ## `SecureChatCompletion`
 High-level OpenAI-compatible client. The recommended entry point for most use cases.
 ### Constructor
 ```typescript
 new SecureChatCompletion(config?: ChatCompletionConfig)
 ```
 #### `ChatCompletionConfig`
 | Option                | Type      | Default                  | Description                                                                                                                                                               |
 | ----------------------- | ----------- | -------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `baseUrl`             | `string`  | `'https://api.nomyo.ai'` | NOMYO router URL. Must be HTTPS in production.                                                                                                                            |
 | `allowHttp`           | `boolean` | `false`                  | Allow HTTP connections.**Local development only.**                                                                                                                        |
 | `apiKey`              | `string`  | `undefined`              | Bearer token sent in`Authorization` header.                                                                                                                               |
 | `secureMemory`        | `boolean` | `true`                   | Enable immediate zeroing of sensitive buffers after use.                                                                                                                  |
 | `timeout`             | `number`  | `60000`                  | Request timeout in milliseconds.                                                                                                                                          |
 | `debug`               | `boolean` | `false`                  | Print verbose logging to the console.                                                                                                                                     |
 | `keyDir`              | `string`  | `'client_keys'`          | Directory to load/save RSA keys on startup. If the directory contains an existing key pair it is loaded; otherwise a new pair is generated and saved there. Node.js only. |
 | `keyRotationInterval` | `number`  | `86400000` (24 h)        | Auto-rotate RSA keys every N milliseconds. Set to`0` to disable.                                                                                                          |
 | `keyRotationDir`      | `string`  | `'client_keys'`          | Directory where rotated key files are saved. Node.js only.                                                                                                                |
 | `keyRotationPassword` | `string`  | `undefined`              | Password used to encrypt rotated key files.                                                                                                                               |
 | `maxRetries`          | `number`  | `2`                      | Maximum extra attempts on retryable errors (429, 500, 502, 503, 504, network errors). Uses exponential backoff (1 s, 2 s, …). Set to`0` to disable retries.              |
 ### Methods
 #### `create(request): Promise<ChatCompletionResponse>`
 Send an encrypted chat completion request. Returns the decrypted response.
 ```typescript
 async create(request: ChatCompletionRequest): Promise<ChatCompletionResponse>
 ```
 **`ChatCompletionRequest` fields:**
 | Field               | Type                     | Description                                                                                                                    |
 | --------------------- | -------------------------- | -------------------------------------------------------------------------------------------------------------------------------- |
 | `model`             | `string`                 | **Required.** Model ID (see [Models](models.md)).                                                                              |
 | `messages`          | `Message[]`              | **Required.** Conversation history.                                                                                            |
 | `temperature`       | `number`                 | Sampling temperature (0–2).                                                                                                   |
 | `top_p`             | `number`                 | Nucleus sampling.                                                                                                              |
 | `max_tokens`        | `number`                 | Maximum tokens to generate.                                                                                                    |
 | `stop`              | `string | string[]`      | Stop sequences.                                                                                                                |
 | `n`                 | `number`                 | Number of completions to generate.                                                                                             |
 | `stream`            | `boolean`                | Ignored server-side (encryption requires full response).                                                                       |
 | `presence_penalty`  | `number`                 | Presence penalty (−2.0–2.0).                                                                                                 |
 | `frequency_penalty` | `number`                 | Frequency penalty (−2.0–2.0).                                                                                                |
 | `logit_bias`        | `Record<string, number>` | Token bias map.                                                                                                                |
 | `user`              | `string`                 | End-user identifier (passed through).                                                                                          |
 | `tools`             | `Tool[]`                 | Tool/function definitions.                                                                                                     |
 | `tool_choice`       | `ToolChoice`             | Tool selection strategy (`"auto"`, `"none"`, `"required"`, or specific tool).                                                  |
 | `security_tier`     | `string`                 | NOMYO-specific.`"standard"` \| `"high"` \| `"maximum"`. Not encrypted into the payload.                                        |
 | `api_key`           | `string`                 | NOMYO-specific. Per-request API key override. Not encrypted into the payload.                                                  |
 | `base_url`          | `string`                 | NOMYO-specific. Per-request router URL override. Creates a temporary client for this one call. Not encrypted into the payload. |
 **Response shape (`ChatCompletionResponse`):**
 ```typescript
 {
  id: string;
  object: 'chat.completion';
  created: number;
  model: string;
  choices: Array<{
    index: number;
    message: {
      role: string;
      content: string;
      tool_calls?: ToolCall[];       // present if tools were invoked
      reasoning_content?: string;   // chain-of-thought (Qwen3, DeepSeek-R1, etc.)
    };
    finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | null;
  }>;
  usage?: {
    prompt_tokens: number;
    completion_tokens: number;
    total_tokens: number;
  };
  _metadata?: {
    payload_id: string;           // echoes the X-Payload-ID sent with the request
    processed_at: number;         // Unix timestamp of server-side processing
    is_encrypted: boolean;        // always true for this endpoint
    encryption_algorithm: string; // e.g. "hybrid-aes256-rsa4096"
    response_status: string;      // "success" on success
    security_tier?: string;       // active tier used by the server
    memory_protection?: {
      platform: string;
      memory_locking: boolean;
      secure_zeroing: boolean;
      core_dump_prevention: boolean;
    };
    cuda_device?: {
      available: boolean;
      device_hash: string;        // SHA-256 of device name (not the raw name)
    };
  };
 }
 ```
 #### `acreate(request): Promise<ChatCompletionResponse>`
 Alias for `create()`. Provided for code that follows the OpenAI SDK naming convention.
 #### `dispose(): void`
 Stop the key-rotation timer and sever in-memory RSA key references so they can be garbage-collected. After calling `dispose()`, all methods throw `DisposedError`.
 ```javascript
 client.dispose();
 ```
 ---
 ## `SecureCompletionClient`
 Lower-level client that exposes key management and individual encryption/decryption operations.
 Use this when you need fine-grained control; for most use cases prefer `SecureChatCompletion`.
 ### Constructor
 ```typescript
 new SecureCompletionClient(config?: ClientConfig)
 ```
 #### `ClientConfig`
 All options from `ChatCompletionConfig`, plus:
 | Option      | Type          | Default                        | Description                                                   |
 | ------------- | --------------- | -------------------------------- | --------------------------------------------------------------- |
 | `routerUrl` | `string`      | `'https://api.nomyo.ai'`       | NOMYO router base URL.                                        |
 | `keySize`   | `2048 | 4096` | `4096`                         | RSA modulus length. 2048 is accepted but 4096 is recommended. |
 (`baseUrl` is renamed to `routerUrl` at this level; all other options are identical.)
 ### Methods
 #### `generateKeys(options?): Promise<void>`
 Generate a fresh RSA key pair.
 ```typescript
 await client.generateKeys({
  keySize?: 2048 | 4096,     // default: 4096
  saveToFile?: boolean,      // default: false
  keyDir?: string,           // default: 'client_keys'
  password?: string,         // minimum 8 characters if provided
 });
 ```
 #### `loadKeys(privateKeyPath, publicKeyPath?, password?): Promise<void>`
 Load an existing key pair from PEM files. Node.js only.
 ```typescript
 await client.loadKeys(
  'client_keys/private_key.pem',
  'client_keys/public_key.pem',  // optional; derived from private key path if omitted
  'your-password'                // required if private key is encrypted
 );
 ```
 #### `fetchServerPublicKey(): Promise<string>`
 Fetch the server's RSA public key from `/pki/public_key` over HTTPS. Called automatically on every encryption; exposed for diagnostics.
 #### `encryptPayload(payload): Promise<ArrayBuffer>`
 Encrypt a request payload. Returns the encrypted binary package ready to POST.
 #### `decryptResponse(encrypted, payloadId): Promise<object>`
 Decrypt a response body received from the secure endpoint.
 #### `sendSecureRequest(payload, payloadId, apiKey?, securityTier?): Promise<object>`
 Full encrypt → POST → decrypt cycle with retry logic. Called internally by `SecureChatCompletion.create()`.
 #### `dispose(): void`
 Same as `SecureChatCompletion.dispose()`.
 ---
 ## Secure Memory API
 ```typescript
 import {
  getMemoryProtectionInfo,
  disableSecureMemory,
  enableSecureMemory,
  SecureByteContext,
 } from 'nomyo-js';
 ```
 ### `getMemoryProtectionInfo(): ProtectionInfo`
 Returns information about the memory protection available on the current platform:
 ```typescript
 interface ProtectionInfo {
  canLock: boolean;       // true if mlock is available (requires native addon)
  isPlatformSecure: boolean;
  method: 'mlock' | 'zero-only' | 'none';
  details?: string;
 }
 ```
 ### `disableSecureMemory(): void`
 Disable secure-memory zeroing globally. Affects new `SecureByteContext` instances that do not pass an explicit `useSecure` argument. Existing client instances are unaffected (they pass `useSecure` explicitly).
 ### `enableSecureMemory(): void`
 Re-enable secure memory operations globally.
 ### `SecureByteContext`
 Low-level context manager that zeros an `ArrayBuffer` in a `finally` block even if an exception occurs. Analogous to Python's `secure_bytearray()` context manager.
 ```typescript
 const context = new SecureByteContext(sensitiveBuffer);
 const result = await context.use(async (data) => {
  return doSomethingWith(data);
 });
 // sensitiveBuffer is zeroed here regardless of whether doSomethingWith threw
 ```
 ---
 ## Error Classes
 All errors are exported from the package root.
 ```typescript
 import {
  APIError,
  AuthenticationError,
  InvalidRequestError,
  RateLimitError,
  ForbiddenError,
  ServerError,
  ServiceUnavailableError,
  APIConnectionError,
  SecurityError,
  DisposedError,
 } from 'nomyo-js';
 ```
 | Class                     | HTTP status | Thrown when                                                  |
 | --------------------------- | ------------- | -------------------------------------------------------------- |
 | `AuthenticationError`     | 401         | Invalid or missing API key                                   |
 | `InvalidRequestError`     | 400         | Malformed request (e.g. streaming requested)                 |
 | `ForbiddenError`          | 403         | Model not allowed for the requested security tier            |
 | `RateLimitError`          | 429         | Rate limit exceeded (after all retries exhausted)            |
 | `ServerError`             | 500         | Internal server error (after all retries exhausted)          |
 | `ServiceUnavailableError` | 503         | Inference backend unavailable (after all retries exhausted)  |
 | `APIError`                | varies      | Other HTTP errors (404, 502, 504, etc.)                      |
 | `APIConnectionError`      | —          | Network failure or timeout (after all retries exhausted)     |
 | `SecurityError`           | —          | HTTPS not used, header injection detected, or crypto failure |
 | `DisposedError`           | —          | Method called after`dispose()`                               |
 All errors that extend `APIError` expose `statusCode?: number` and `errorDetails?: object`.
--- a/doc/examples.md
+++ b/doc/examples.md
@ -0,0 +1,437 @@
 # Examples
 ## Basic Usage
 ### Simple Chat
 ```javascript
 import { SecureChatCompletion } from 'nomyo-js';
 const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY });
 const response = await client.create({
  model: 'Qwen/Qwen3-0.6B',
  messages: [{ role: 'user', content: 'Hello, how are you?' }],
  temperature: 0.7,
 });
 console.log(response.choices[0].message.content);
 client.dispose();
 ```
 ### Chat with System Message
 ```javascript
 const response = await client.create({
  model: 'Qwen/Qwen3-0.6B',
  messages: [
    { role: 'system', content: 'You are a concise technical assistant.' },
    { role: 'user',   content: 'What is the capital of France?' },
  ],
  temperature: 0.7,
 });
 console.log(response.choices[0].message.content);
 ```
 ---
 ## Security Tiers
 ```javascript
 // Standard — general use (GPU)
 const r1 = await client.create({
  model: 'Qwen/Qwen3-0.6B',
  messages: [{ role: 'user', content: 'General query' }],
  security_tier: 'standard',
 });
 // High — sensitive business data
 const r2 = await client.create({
  model: 'Qwen/Qwen3-0.6B',
  messages: [{ role: 'user', content: 'Review this contract clause...' }],
  security_tier: 'high',
 });
 // Maximum — HIPAA PHI / classified data (CPU-only)
 const r3 = await client.create({
  model: 'Qwen/Qwen3-0.6B',
  messages: [{ role: 'user', content: 'Patient record summary...' }],
  security_tier: 'maximum',
 });
 ```
 ---
 ## Tool / Function Calling
 ```javascript
 const response = await client.create({
  model: 'Qwen/Qwen3-0.6B',
  messages: [{ role: 'user', content: "What's the weather in Paris?" }],
  tools: [
    {
      type: 'function',
      function: {
        name: 'get_weather',
        description: 'Get weather information for a location',
        parameters: {
          type: 'object',
          properties: {
            location: { type: 'string', description: 'City name' },
          },
          required: ['location'],
        },
      },
    },
  ],
  tool_choice: 'auto',
 });
 const message = response.choices[0].message;
 if (message.tool_calls?.length) {
  const call = message.tool_calls[0];
  const args = JSON.parse(call.function.arguments);
  console.log(`Call ${call.function.name}(location="${args.location}")`);
  // → Call get_weather(location="Paris")
 }
 ```
 ---
 ## Error Handling
 ```javascript
 import {
  SecureChatCompletion,
  AuthenticationError,
  RateLimitError,
  ForbiddenError,
  InvalidRequestError,
  ServerError,
  ServiceUnavailableError,
  APIConnectionError,
  SecurityError,
 } from 'nomyo-js';
 const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY });
 try {
  const response = await client.create({
    model: 'Qwen/Qwen3-0.6B',
    messages: [{ role: 'user', content: 'Hello' }],
  });
  console.log(response.choices[0].message.content);
 } catch (err) {
  if (err instanceof AuthenticationError) {
    console.error('Check your API key:', err.message);
  } else if (err instanceof RateLimitError) {
    console.error('Rate limit hit after all retries:', err.message);
  } else if (err instanceof ForbiddenError) {
    console.error('Model not allowed for this security tier:', err.message);
  } else if (err instanceof InvalidRequestError) {
    console.error('Bad request:', err.message, err.errorDetails);
  } else if (err instanceof ServerError || err instanceof ServiceUnavailableError) {
    console.error('Server error after retries:', err.message);
  } else if (err instanceof APIConnectionError) {
    console.error('Network error after retries:', err.message);
  } else if (err instanceof SecurityError) {
    console.error('Security/crypto failure:', err.message);
  } else {
    throw err;
  }
 }
 ```
 ---
 ## Real-World Scenarios
 ### Chat Application with History
 ```javascript
 import { SecureChatCompletion } from 'nomyo-js';
 class SecureChatApp {
  constructor(apiKey) {
    this.client = new SecureChatCompletion({ apiKey });
    this.history = [];
  }
  async chat(userMessage) {
    this.history.push({ role: 'user', content: userMessage });
    const response = await this.client.create({
      model: 'Qwen/Qwen3-0.6B',
      messages: this.history,
      temperature: 0.7,
    });
    const assistantMessage = response.choices[0].message;
    this.history.push({ role: assistantMessage.role, content: assistantMessage.content });
    return assistantMessage.content;
  }
  dispose() {
    this.client.dispose();
  }
 }
 const app = new SecureChatApp(process.env.NOMYO_API_KEY);
 const r1 = await app.chat("What's your name?");
 console.log('Assistant:', r1);
 const r2 = await app.chat('What did I just ask you?');
 console.log('Assistant:', r2);
 app.dispose();
 ```
 ### Per-Request Base URL Override
 For multi-tenant setups or testing against different router instances from a single client:
 ```javascript
 const client = new SecureChatCompletion({
  baseUrl: 'https://primary.nomyo.ai:12435',
  apiKey: process.env.NOMYO_API_KEY,
 });
 // This single request goes to a different router; a temporary client is
 // created, used, and disposed automatically — the main client is unchanged
 const response = await client.create({
  model: 'Qwen/Qwen3-0.6B',
  messages: [{ role: 'user', content: 'Hello from secondary router' }],
  base_url: 'https://secondary.nomyo.ai:12435',
 });
 ```
 ### Environment-Based Configuration
 ```javascript
 import 'dotenv/config';
 import { SecureChatCompletion } from 'nomyo-js';
 const client = new SecureChatCompletion({
  baseUrl:    process.env.NOMYO_SERVER_URL ?? 'https://api.nomyo.ai',
  apiKey:     process.env.NOMYO_API_KEY,
  keyDir:     process.env.NOMYO_KEY_DIR   ?? 'client_keys',
  maxRetries: Number(process.env.NOMYO_MAX_RETRIES ?? '2'),
  debug:      process.env.NODE_ENV === 'development',
 });
 ```
 ---
 ## Batch Processing
 ### Sequential (Rate-Limit-Safe)
 ```javascript
 const queries = [
  'Summarise document A',
  'Summarise document B',
  'Summarise document C',
 ];
 const summaries = [];
 for (const query of queries) {
  const response = await client.create({
    model: 'Qwen/Qwen3-0.6B',
    messages: [{ role: 'user', content: query }],
  });
  summaries.push(response.choices[0].message.content);
  // Optional: add a small delay to stay within rate limits
  await new Promise(r => setTimeout(r, 600));
 }
 ```
 ### Concurrent (With Throttling)
 ```javascript
 // Process in batches of 2 (the default rate limit)
 async function batchN(items, batchSize, fn) {
  const results = [];
  for (let i = 0; i < items.length; i += batchSize) {
    const batch = items.slice(i, i + batchSize);
    const batchResults = await Promise.all(batch.map(fn));
    results.push(...batchResults);
    if (i + batchSize < items.length) {
      await new Promise(r => setTimeout(r, 1100)); // wait >1 s between batches
    }
  }
  return results;
 }
 const summaries = await batchN(documents, 2, async (doc) => {
  const response = await client.create({
    model: 'Qwen/Qwen3-0.6B',
    messages: [{ role: 'user', content: `Summarise: ${doc}` }],
  });
  return response.choices[0].message.content;
 });
 ```
 ---
 ## Thinking Models
 ```javascript
 const response = await client.create({
  model: 'LiquidAI/LFM2.5-1.2B-Thinking',
  messages: [{ role: 'user', content: 'Is 9.9 larger than 9.11?' }],
 });
 const { content, reasoning_content } = response.choices[0].message;
 console.log('Reasoning:', reasoning_content);   // internal chain-of-thought
 console.log('Answer:',    content);             // final answer to the user
 ```
 ---
 ## Browser Usage
 ```html
 <!DOCTYPE html>
 <html>
 <head>
  <title>NOMYO Secure Chat</title>
 </head>
 <body>
  <textarea id="input" placeholder="Ask something..."></textarea>
  <button id="send">Send</button>
  <div id="output"></div>
  <script type="module">
    import { SecureChatCompletion } from 'https://unpkg.com/nomyo-js/dist/browser/index.js';
    // In production, proxy through your backend instead of exposing the API key
    const client = new SecureChatCompletion({
      baseUrl: 'https://api.nomyo.ai',
      apiKey:  'your-api-key',        // see note above
    });
    document.getElementById('send').addEventListener('click', async () => {
      const text = document.getElementById('input').value.trim();
      if (!text) return;
      document.getElementById('output').textContent = 'Thinking...';
      try {
        const response = await client.create({
          model: 'Qwen/Qwen3-0.6B',
          messages: [{ role: 'user', content: text }],
        });
        document.getElementById('output').textContent =
          response.choices[0].message.content;
      } catch (err) {
        document.getElementById('output').textContent = `Error: ${err.message}`;
      }
    });
  </script>
 </body>
 </html>
 ```
 ---
 ## Advanced Key Management
 ### Custom Key Directory
 ```javascript
 const client = new SecureChatCompletion({
  apiKey:  process.env.NOMYO_API_KEY,
  keyDir:  '/var/lib/myapp/nomyo-keys',   // outside project directory
  keyRotationDir:      '/var/lib/myapp/nomyo-keys',
  keyRotationPassword: process.env.NOMYO_KEY_PASSWORD,
 });
 ```
 ### Generating Keys Manually
 ```javascript
 import { SecureCompletionClient } from 'nomyo-js';
 const client = new SecureCompletionClient({
  routerUrl: 'https://api.nomyo.ai',
 });
 // Generate a new 4096-bit key pair and save it with password protection
 await client.generateKeys({
  saveToFile: true,
  keyDir:     'client_keys',
  password:   process.env.NOMYO_KEY_PASSWORD,
 });
 console.log('Keys generated and saved to client_keys/');
 client.dispose();
 ```
 ### Loading Keys Explicitly
 ```javascript
 import { SecureCompletionClient } from 'nomyo-js';
 const client = new SecureCompletionClient({ routerUrl: 'https://api.nomyo.ai' });
 await client.loadKeys(
  'client_keys/private_key.pem',
  'client_keys/public_key.pem',
  process.env.NOMYO_KEY_PASSWORD
 );
 // Now send requests using the loaded keys
 const result = await client.sendSecureRequest(
  { model: 'Qwen/Qwen3-0.6B', messages: [{ role: 'user', content: 'Hello' }] },
  crypto.randomUUID()
 );
 client.dispose();
 ```
 ---
 ## Inspecting Memory Protection
 ```javascript
 import { getMemoryProtectionInfo } from 'nomyo-js';
 const info = getMemoryProtectionInfo();
 console.log(`Memory method: ${info.method}`);        // 'zero-only' or 'mlock'
 console.log(`Can lock:      ${info.canLock}`);       // true if native addon present
 console.log(`Details:       ${info.details}`);
 ```
 ---
 ## TypeScript
 Full type safety out of the box:
 ```typescript
 import {
  SecureChatCompletion,
  ChatCompletionRequest,
  ChatCompletionResponse,
  Message,
 } from 'nomyo-js';
 const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY! });
 const messages: Message[] = [
  { role: 'user', content: 'Hello!' },
 ];
 const request: ChatCompletionRequest = {
  model: 'Qwen/Qwen3-0.6B',
  messages,
  temperature: 0.7,
 };
 const response: ChatCompletionResponse = await client.create(request);
 const content = response.choices[0].message.content;
 client.dispose();
 ```
--- a/doc/getting-started.md
+++ b/doc/getting-started.md
@ -0,0 +1,279 @@
 # Getting Started
 ## Overview
 NOMYO.js provides end-to-end encryption for all communication between your application and NOMYO inference endpoints. Your prompts and responses are encrypted before leaving your process and decrypted only after they arrive back — the server never sees plaintext.
 The API mirrors OpenAI's `ChatCompletion`, making it easy to integrate into existing code.
 > **Note on streaming:** The API is non-streaming. Setting `stream: true` in a request is ignored server-side to maintain full response encryption.
 ---
 ## Simple Chat Completion
 ```javascript
 import { SecureChatCompletion } from 'nomyo-js';
 const client = new SecureChatCompletion({
  apiKey: process.env.NOMYO_API_KEY,
 });
 const response = await client.create({
  model: 'Qwen/Qwen3-0.6B',
  messages: [{ role: 'user', content: 'Hello! How are you today?' }],
  temperature: 0.7,
 });
 // Extract what you need, then let the response go out of scope promptly.
 // This minimises the time decrypted data lives in process memory
 // (reduces exposure from swap files, core dumps, or memory inspection).
 const reply = response.choices[0].message.content;
 console.log(reply);
 ```
 ### With a System Message
 ```javascript
 const response = await client.create({
  model: 'Qwen/Qwen3-0.6B',
  messages: [
    { role: 'system', content: 'You are a helpful assistant.' },
    { role: 'user',   content: 'What is the capital of France?' },
  ],
  temperature: 0.7,
 });
 console.log(response.choices[0].message.content);
 ```
 ---
 ## API Key Authentication
 ```javascript
 // Constructor-level key (used for all requests from this instance)
 const client = new SecureChatCompletion({
  apiKey: process.env.NOMYO_API_KEY,
 });
 // Per-request key override (takes precedence over constructor key)
 const response = await client.create({
  model: 'Qwen/Qwen3-0.6B',
  messages: [{ role: 'user', content: 'Hello!' }],
  api_key: 'override-key-for-this-request',
 });
 ```
 ---
 ## Security Tiers
 Pass `security_tier` in the request to control hardware routing and isolation level:
 | Tier         | Use case                                              |
 | -------------- | ------------------------------------------------------- |
 | `"standard"` | General secure inference (GPU)                        |
 | `"high"`     | Sensitive business data — enforces secure tokenizer  |
 | `"maximum"`  | HIPAA PHI, classified data — E2EE, maximum isolation |
 ```javascript
 // Standard — general use
 const response = await client.create({
  model: 'Qwen/Qwen3-0.6B',
  messages: [{ role: 'user', content: 'General query' }],
  security_tier: 'standard',
 });
 // High — sensitive business data
 const response2 = await client.create({
  model: 'Qwen/Qwen3-0.6B',
  messages: [{ role: 'user', content: 'Summarise this contract clause...' }],
  security_tier: 'high',
 });
 // Maximum — PHI / classified data
 const response3 = await client.create({
  model: 'Qwen/Qwen3-0.6B',
  messages: [{ role: 'user', content: 'Patient record summary...' }],
  security_tier: 'maximum',
 });
 ```
 > Using `"high"` or `"maximum"` adds latency vs `"standard"` due to additional isolation measures.
 ---
 ## Using Tools (Function Calling)
 ```javascript
 const response = await client.create({
  model: 'Qwen/Qwen3-0.6B',
  messages: [{ role: 'user', content: "What's the weather in Paris?" }],
  tools: [
    {
      type: 'function',
      function: {
        name: 'get_weather',
        description: 'Get weather information for a location',
        parameters: {
          type: 'object',
          properties: {
            location: { type: 'string', description: 'City name' },
          },
          required: ['location'],
        },
      },
    },
  ],
  tool_choice: 'auto',
  temperature: 0.7,
 });
 const message = response.choices[0].message;
 if (message.tool_calls) {
  const call = message.tool_calls[0];
  console.log('Tool called:', call.function.name);
  console.log('Arguments:', call.function.arguments);
 }
 ```
 ---
 ## Error Handling
 Import typed error classes to distinguish failure modes:
 ```javascript
 import {
  SecureChatCompletion,
  AuthenticationError,
  RateLimitError,
  InvalidRequestError,
  ForbiddenError,
  ServerError,
  ServiceUnavailableError,
  APIConnectionError,
  SecurityError,
 } from 'nomyo-js';
 const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY });
 try {
  const response = await client.create({
    model: 'Qwen/Qwen3-0.6B',
    messages: [{ role: 'user', content: 'Hello!' }],
  });
  console.log(response.choices[0].message.content);
 } catch (err) {
  if (err instanceof AuthenticationError) {
    console.error('Bad API key:', err.message);
  } else if (err instanceof RateLimitError) {
    // The client already retried automatically (default: 2 retries).
    // If you reach here, all retries were exhausted.
    console.error('Rate limit exceeded after retries:', err.message);
  } else if (err instanceof ForbiddenError) {
    // Model not allowed for the requested security_tier
    console.error('Forbidden:', err.message);
  } else if (err instanceof InvalidRequestError) {
    console.error('Bad request:', err.message);
  } else if (err instanceof ServerError || err instanceof ServiceUnavailableError) {
    console.error('Server error (retries exhausted):', err.message);
  } else if (err instanceof APIConnectionError) {
    console.error('Network error (retries exhausted):', err.message);
  } else if (err instanceof SecurityError) {
    console.error('Encryption/decryption failure:', err.message);
  } else {
    throw err;   // re-throw unexpected errors
  }
 }
 ```
 All typed errors expose:
 - `message: string` — human-readable description
 - `statusCode?: number` — HTTP status (where applicable)
 - `errorDetails?: object` — raw response body (where applicable)
 ---
 ## Resource Management
 Always call `dispose()` when you're done with a client to stop the background key-rotation timer and zero in-memory key material:
 ```javascript
 const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY });
 try {
  const response = await client.create({ ... });
  console.log(response.choices[0].message.content);
 } finally {
  client.dispose();
 }
 ```
 For long-running servers (HTTP handlers, daemons), create one client instance and reuse it — don't create a new one per request.
 ---
 ## `acreate()` Alias
 `acreate()` is a direct alias for `create()` provided for code that follows the OpenAI naming convention:
 ```javascript
 const response = await client.acreate({
  model: 'Qwen/Qwen3-0.6B',
  messages: [{ role: 'user', content: 'Hello!' }],
 });
 ```
 ---
 ## Browser Usage
 In browsers, keys are kept in memory only (no file system). Everything else is identical to Node.js.
 ```html
 <script type="module">
  import { SecureChatCompletion } from 'https://unpkg.com/nomyo-js/dist/browser/index.js';
  const client = new SecureChatCompletion({
    baseUrl: 'https://api.nomyo.ai',
    apiKey: 'your-api-key',
  });
  const response = await client.create({
    model: 'Qwen/Qwen3-0.6B',
    messages: [{ role: 'user', content: 'Hello from the browser!' }],
  });
  console.log(response.choices[0].message.content);
  client.dispose();
 </script>
 ```
 > **Security note:** Embedding API keys in browser-side code exposes them to end users. In a real application, proxy requests through your backend or use short-lived tokens.
 ---
 ## Local Development (HTTP)
 For a local NOMYO router running over plain HTTP:
 ```javascript
 const client = new SecureChatCompletion({
  baseUrl: 'http://localhost:12435',
  allowHttp: true,   // required; prints a security warning
 });
 ```
 Never use `allowHttp: true` in production.
--- a/doc/installation.md
+++ b/doc/installation.md
@ -0,0 +1,107 @@
 # Installation
 ## Prerequisites
 - **Node.js**: 14.17 or higher (18 LTS recommended)
 - **npm** / **yarn** / **pnpm**
 - For TypeScript projects: TypeScript 4.7+
 ## Install from npm
 ```bash
 npm install nomyo-js
 ```
 ```bash
 yarn add nomyo-js
 ```
 ```bash
 pnpm add nomyo-js
 ```
 ## Browser (CDN)
 ```html
 <script type="module">
  import { SecureChatCompletion } from 'https://unpkg.com/nomyo-js/dist/browser/index.js';
  // ...
 </script>
 ```
 ---
 ## Verify Installation
 ### Node.js
 ```javascript
 import { SecureChatCompletion, getMemoryProtectionInfo } from 'nomyo-js';
 const info = getMemoryProtectionInfo();
 console.log('Memory protection:', info.method);   // e.g. "zero-only"
 console.log('Can lock:', info.canLock);           // true if native addon present
 const client = new SecureChatCompletion({ apiKey: 'test' });
 console.log('nomyo-js installed successfully');
 client.dispose();
 ```
 ### CommonJS
 ```javascript
 const { SecureChatCompletion } = require('nomyo-js');
 ```
 ## Optional: Native Memory Addon
 The pure-JS implementation zeroes buffers immediately after use but cannot prevent the OS from paging sensitive data to swap.
 The optional native addon adds `mlock`/`VirtualLock` support for true OS-level memory locking.
 ```bash
 cd node_modules/nomyo-js/native
 npm install
 npm run build
 ```
 Or if you installed `nomyo-native` separately:
 ```bash
 npm install nomyo-native
 ```
 When the addon is present `getMemoryProtectionInfo()` will report `method: 'mlock'` and `canLock: true`.
 ## TypeScript
 All public APIs ship with bundled type declarations — no `@types/` package required.
 ```typescript
 import {
  SecureChatCompletion,
  ChatCompletionRequest,
  ChatCompletionResponse,
  getMemoryProtectionInfo,
 } from 'nomyo-js';
 ```
 ## Environment Variables
 Store secrets outside source code:
 ```bash
 # .env (never commit this file)
 NOMYO_API_KEY=your-api-key
 NOMYO_SERVER_URL=https://api.nomyo.ai
 NOMYO_KEY_PASSWORD=your-key-password
 ```
 ```javascript
 import 'dotenv/config';   // or use process.env directly
 import { SecureChatCompletion } from 'nomyo-js';
 const client = new SecureChatCompletion({
  baseUrl: process.env.NOMYO_SERVER_URL,
  apiKey:  process.env.NOMYO_API_KEY,
 });
 ```
--- a/doc/models.md
+++ b/doc/models.md
@ -0,0 +1,85 @@
 # Available Models
 All models are available via `api.nomyo.ai`. Pass the model ID string directly to the `model` field of `create()`.
 ## Model List
 | Model ID | Parameters | Type | Notes |
 |---|---|---|---|
 | `Qwen/Qwen3-0.6B` | 0.6B | General | Lightweight, fast inference |
 | `Qwen/Qwen3.5-0.8B` | 0.8B | General | Lightweight, fast inference |
 | `LiquidAI/LFM2.5-1.2B-Thinking` | 1.2B | Thinking | Reasoning model |
 | `ibm-granite/granite-4.0-h-small` | Small | General | IBM Granite 4.0, enterprise-focused |
 | `Qwen/Qwen3.5-9B` | 9B | General | Balanced quality and speed |
 | `utter-project/EuroLLM-9B-Instruct-2512` | 9B | General | Multilingual, strong European language support |
 | `zai-org/GLM-4.7-Flash` | — | General | Fast GLM variant |
 | `mistralai/Ministral-3-14B-Instruct-2512-GGUF` | 14B | General | Mistral instruction-tuned |
 | `ServiceNow-AI/Apriel-1.6-15b-Thinker` | 15B | Thinking | Reasoning model |
 | `openai/gpt-oss-20b` | 20B | General | OpenAI open-weight release |
 | `LiquidAI/LFM2-24B-A2B` | 24B (2B active) | General | MoE — efficient inference |
 | `Qwen/Qwen3.5-27B` | 27B | General | High quality, large context |
 | `google/medgemma-27b-it` | 27B | Specialized | Medical domain, instruction-tuned |
 | `nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4` | 30B (3B active) | General | MoE — efficient inference |
 | `Qwen/Qwen3.5-35B-A3B` | 35B (3B active) | General | MoE — efficient inference |
 | `moonshotai/Kimi-Linear-48B-A3B-Instruct` | 48B (3B active) | General | MoE — large capacity, efficient inference |
 > **MoE** (Mixture of Experts) models show total/active parameter counts. Only active parameters are used per token, keeping inference cost low relative to total model size.
 ## Usage
 ```javascript
 import { SecureChatCompletion } from 'nomyo-js';
 const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY });
 const response = await client.create({
  model: 'Qwen/Qwen3.5-9B',
  messages: [{ role: 'user', content: 'Hello!' }],
 });
 ```
 ## Choosing a Model
 | Goal | Recommended models |
 |------|--------------------|
 | **Low latency / edge** | `Qwen/Qwen3-0.6B`, `Qwen/Qwen3.5-0.8B`, `LiquidAI/LFM2.5-1.2B-Thinking` |
 | **Balanced quality + speed** | `Qwen/Qwen3.5-9B`, `mistralai/Ministral-3-14B-Instruct-2512-GGUF` |
 | **Reasoning / chain-of-thought** | `LiquidAI/LFM2.5-1.2B-Thinking`, `ServiceNow-AI/Apriel-1.6-15b-Thinker` |
 | **Multilingual** | `utter-project/EuroLLM-9B-Instruct-2512` |
 | **Medical** | `google/medgemma-27b-it` |
 | **Highest quality** | `moonshotai/Kimi-Linear-48B-A3B-Instruct`, `Qwen/Qwen3.5-35B-A3B` |
 ## Thinking Models
 Models marked **Thinking** return an additional `reasoning_content` field in the response message alongside the normal `content`. This contains the model's internal chain-of-thought:
 ```javascript
 const response = await client.create({
  model: 'LiquidAI/LFM2.5-1.2B-Thinking',
  messages: [{ role: 'user', content: 'Is 9.9 or 9.11 larger?' }],
 });
 const { content, reasoning_content } = response.choices[0].message;
 console.log('Reasoning:', reasoning_content); // internal chain-of-thought
 console.log('Answer:', content);              // final answer
 ```
 ## Security Tier Compatibility
 Not all models are available on all security tiers. If a model is not permitted for the requested tier, the server returns HTTP 403 and the client throws `ForbiddenError`.
 ```javascript
 import { ForbiddenError } from 'nomyo-js';
 try {
  const response = await client.create({
    model: 'Qwen/Qwen3.5-27B',
    messages: [{ role: 'user', content: '...' }],
    security_tier: 'maximum',
  });
 } catch (err) {
  if (err instanceof ForbiddenError) {
    // Model not available at this security tier — retry with a different tier or model
  }
 }
 ```
--- a/doc/rate-limits.md
+++ b/doc/rate-limits.md
@ -0,0 +1,115 @@
 # Rate Limits
 The NOMYO API (`api.nomyo.ai`) enforces rate limits to ensure fair usage and service stability for all users.
 ## Default Rate Limit
 By default, each API key is limited to **2 requests per second**.
 ## Burst Allowance
 Short bursts above the default limit are permitted. You may send up to **4 requests per second** in burst mode, provided you have not exceeded burst usage within the current **10-second window**.
 Burst capacity is granted once per 10-second window. If you consume the burst allowance, you must wait for the window to reset before burst is available again.
 ## Rate Limit Summary
 | Mode | Limit | Condition |
 |------|-------|-----------|
 | Default | 2 requests/second | Always active |
 | Burst | 4 requests/second | Once per 10-second window |
 ## Error Responses
 ### 429 Too Many Requests
 Returned when your request rate exceeds the allowed limit.
 The client retries automatically (see below). If all retries are exhausted, `RateLimitError` is thrown:
 ```javascript
 import { SecureChatCompletion, RateLimitError } from 'nomyo-js';
 try {
  const response = await client.create({ ... });
 } catch (err) {
  if (err instanceof RateLimitError) {
    // All retries exhausted — back off manually before trying again
    console.error('Rate limit exceeded:', err.message);
  }
 }
 ```
 ### 503 Service Unavailable (Cool-down)
 Returned when burst limits are abused repeatedly. A **30-minute cool-down** is applied to the offending API key.
 **What to do:** Wait 30 minutes before retrying. Review your request patterns to ensure you stay within the permitted limits.
 ## Automatic Retry Behaviour
 The client retries automatically on `429`, `500`, `502`, `503`, `504`, and network errors using exponential backoff:
 | Attempt | Delay before attempt |
 |---------|----------------------|
 | 1st (initial) | — |
 | 2nd | 1 second |
 | 3rd | 2 seconds |
 The default is **2 retries** (3 total attempts). Adjust per client:
 ```javascript
 // More retries for high-throughput workloads
 const client = new SecureChatCompletion({
  apiKey: process.env.NOMYO_API_KEY,
  maxRetries: 5,
 });
 // Disable retries entirely (fail fast)
 const client2 = new SecureChatCompletion({
  apiKey: process.env.NOMYO_API_KEY,
  maxRetries: 0,
 });
 ```
 ## Best Practices
 - **Throttle requests client-side** to stay at or below 2 requests/second under normal load.
 - **Use burst sparingly** — it is intended for occasional spikes, not sustained high-throughput usage.
 - **Increase `maxRetries`** for background jobs that can tolerate extra latency.
 - **Monitor for `503` responses** — repeated occurrences indicate your usage pattern is triggering the abuse threshold.
 - **Parallel requests** (e.g. `Promise.all`) count against the same rate limit — be careful with large batches.
 ## Batch Processing Example
 Throttle parallel requests to stay within the rate limit:
 ```javascript
 import { SecureChatCompletion } from 'nomyo-js';
 const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY });
 async function throttledBatch(queries, requestsPerSecond = 2) {
  const results = [];
  const delayMs = 1000 / requestsPerSecond;
  for (const query of queries) {
    const start = Date.now();
    const response = await client.create({
      model: 'Qwen/Qwen3-0.6B',
      messages: [{ role: 'user', content: query }],
    });
    results.push(response.choices[0].message.content);
    // Throttle: wait for the remainder of the time slot
    const elapsed = Date.now() - start;
    if (elapsed < delayMs) {
      await new Promise(resolve => setTimeout(resolve, delayMs - elapsed));
    }
  }
  client.dispose();
  return results;
 }
 ```
--- a/doc/security-guide.md
+++ b/doc/security-guide.md
@ -0,0 +1,237 @@
 # Security Guide
 ## Overview
 NOMYO.js provides end-to-end encryption for all communication between your application and NOMYO inference endpoints. Your prompts and responses are encrypted before leaving your process — the inference server never processes plaintext.
 For the full cryptographic architecture and threat model see [SECURITY.md](../docs/SECURITY.md).
 ---
 ## Encryption Mechanism
 ### Hybrid Encryption
 Each request uses a two-layer scheme:
 1. **AES-256-GCM** encrypts the payload (authenticated encryption — prevents tampering).
 2. **RSA-OAEP-SHA256** wraps the AES key for secure key exchange.
 The server holds the RSA private key; your client generates the AES key fresh for every request.
 ### Per-Request Ephemeral AES Keys
 - A new 256-bit AES key is generated for every `create()` call using the Web Crypto API.
 - The key is never reused — forward secrecy is ensured per request.
 - The key is zeroed from memory immediately after encryption.
 ### Key Exchange
 Your client's RSA public key is sent in the `X-Public-Key` request header. The server encrypts the response with it so only your client can decrypt the reply.
 ---
 ## Memory Protection
 ### What the Library Does
 All intermediate sensitive buffers (AES key, plaintext payload, decrypted response bytes) are wrapped in `SecureByteContext`. This guarantees they are zeroed in a `finally` block immediately after use, even if an exception occurs.
 The encrypted request body (`ArrayBuffer`) is also zeroed by the Node.js HTTP client after the data is handed to the socket.
 ### Limitations (Pure JavaScript)
 JavaScript has no direct access to OS memory management. The library cannot:
 - Lock pages to prevent swapping (`mlock` / `VirtualLock`)
 - Prevent the garbage collector from copying data internally
 - Guarantee memory won't appear in core dumps
 **Impact:** On a system under memory pressure, sensitive data could briefly be written to swap. For environments where this is unacceptable (PHI, classified), install the optional native addon or run on a system with swap disabled.
 ### Native Addon (Optional)
 The `nomyo-native` addon adds true `mlock` support. When installed, `getMemoryProtectionInfo()` reports `method: 'mlock'` and `canLock: true`:
 ```javascript
 import { getMemoryProtectionInfo } from 'nomyo-js';
 const info = getMemoryProtectionInfo();
 // Without addon: { method: 'zero-only', canLock: false }
 // With addon:    { method: 'mlock',     canLock: true  }
 ```
 ---
 ## Minimise Response Lifetime
 The library protects all intermediate crypto material in secure memory. However, the **final parsed response object** is returned to your code, and you are responsible for how long it lives.
 ```javascript
 // GOOD — extract what you need, then drop the response immediately
 const response = await client.create({
  model: 'Qwen/Qwen3.5-9B',
  messages: [{ role: 'user', content: 'Summarise patient record #1234' }],
  security_tier: 'maximum',
 });
 const reply = response.choices[0].message.content;
 // Let response go out of scope here — don't hold it in a variable
 // longer than necessary
 // BAD — holding the full response object in a long-lived scope
 this.lastResponse = response;  // stored for minutes / hours
 ```
 JavaScript's `delete` and variable reassignment do not zero the underlying memory. For sensitive data (PHI, classified), process and discard as quickly as possible — do not store in class attributes, global caches, or log files.
 ---
 ## Key Management
 ### Default Behaviour
 Keys are automatically generated on first use and saved to `client_keys/` (Node.js). On subsequent runs the saved keys are reloaded automatically.
 ```
 client_keys/
  private_key.pem    # permissions 0600 (owner-only)
  public_key.pem     # permissions 0644
 ```
 ### Configure the Key Directory
 ```javascript
 const client = new SecureChatCompletion({
  apiKey: process.env.NOMYO_API_KEY,
  keyDir: '/etc/myapp/nomyo-keys',  // custom path, outside project directory
 });
 ```
 ### Password-Protected Keys (Recommended for Production)
 Protect key files with a password so they cannot be used even if the file is leaked:
 ```javascript
 import { SecureCompletionClient } from 'nomyo-js';
 const client = new SecureCompletionClient({ routerUrl: 'https://api.nomyo.ai' });
 await client.generateKeys({
  saveToFile: true,
  keyDir: 'client_keys',
  password: process.env.NOMYO_KEY_PASSWORD,  // minimum 8 characters
 });
 ```
 To load password-protected keys manually:
 ```javascript
 await client.loadKeys(
  'client_keys/private_key.pem',
  'client_keys/public_key.pem',
  process.env.NOMYO_KEY_PASSWORD
 );
 ```
 ### Key Rotation
 Keys rotate automatically every 24 hours by default. Configure or disable:
 ```javascript
 const client = new SecureChatCompletion({
  apiKey: process.env.NOMYO_API_KEY,
  keyRotationInterval: 3600000,              // rotate every hour
  keyRotationDir: '/var/lib/myapp/keys',
  keyRotationPassword: process.env.KEY_PWD,
 });
 // Or disable entirely for short-lived processes
 const client2 = new SecureChatCompletion({
  apiKey: process.env.NOMYO_API_KEY,
  keyRotationInterval: 0,
 });
 ```
 ### File Permissions
 Private key files are saved with `0600` permissions (owner read/write only) on Unix-like systems. Add `client_keys/` and `*.pem` to your `.gitignore` — both are already included if you use this package's default `.gitignore`.
 ---
 ## Security Tiers
 | Tier | Hardware | Use case |
 |------|----------|----------|
 | `"standard"` | GPU | General secure inference |
 | `"high"` | CPU/GPU balanced | Sensitive business data, enforces secure tokenizer |
 | `"maximum"` | CPU only | HIPAA PHI, classified data — maximum isolation |
 Higher tiers add round-trip latency but increase hardware-level isolation.
 ---
 ## HTTPS Enforcement
 The client enforces HTTPS by default. HTTP connections require explicit opt-in and print a visible warning:
 ```javascript
 // Production — HTTPS only (default)
 const client = new SecureChatCompletion({ baseUrl: 'https://api.nomyo.ai' });
 // Local development — HTTP allowed with explicit flag
 const devClient = new SecureChatCompletion({
  baseUrl: 'http://localhost:12435',
  allowHttp: true,   // prints: "WARNING: Using HTTP instead of HTTPS..."
 });
 ```
 Without `allowHttp: true`, connecting over HTTP throws `SecurityError`.
 The server's public key is fetched over HTTPS with TLS certificate verification to prevent man-in-the-middle attacks.
 ---
 ## API Key Security
 API keys are sent as `Bearer` tokens in the `Authorization` header. The client validates that the key does not contain CR or LF characters to prevent HTTP header injection.
 Never hardcode API keys in source code — use environment variables:
 ```javascript
 const client = new SecureChatCompletion({
  apiKey: process.env.NOMYO_API_KEY,
 });
 ```
 ---
 ## Production Checklist
 - [ ] Always use HTTPS (`allowHttp` is `false` by default)
 - [ ] Load API key from environment variable, not hardcoded
 - [ ] Enable `secureMemory: true` (default)
 - [ ] Use password-protected key files (`keyRotationPassword`)
 - [ ] Store keys outside the project directory and outside version control
 - [ ] Add `client_keys/` and `*.pem` to `.gitignore`
 - [ ] Call `client.dispose()` when the client is no longer needed
 - [ ] Consider the native addon if swap-file exposure is unacceptable
 ---
 ## Compliance Considerations
 ### HIPAA
 For Protected Health Information (PHI):
 - Use `security_tier: 'maximum'` on requests containing PHI
 - Enable password-protected key files
 - Ensure HTTPS is enforced (the default)
 - Minimise response lifetime in memory (extract, use, discard)
 ### Data Classification
 | Classification | Recommended tier |
 |---------------|-----------------|
 | Public / internal | `"standard"` |
 | Confidential business data | `"high"` |
 | PHI, PII, classified | `"maximum"` |
--- a/doc/troubleshooting.md
+++ b/doc/troubleshooting.md
@ -0,0 +1,314 @@
 # Troubleshooting
 ## Authentication Errors
 ### `AuthenticationError: Invalid or missing API key`
 The server rejected your API key.
 **Causes and fixes:**
 - Key not set — pass `apiKey` to the constructor or use `process.env.NOMYO_API_KEY`.
 - Key has leading/trailing whitespace — check the value with `console.log(JSON.stringify(process.env.NOMYO_API_KEY))`.
 - Key contains CR or LF characters — the client rejects keys with `\r` or `\n` and throws `SecurityError` before the request is sent. Regenerate the key.
 ```javascript
 const client = new SecureChatCompletion({
  apiKey: process.env.NOMYO_API_KEY,  // never hardcode
 });
 ```
 ---
 ## Connection Errors
 ### `APIConnectionError: Network error` / `connect ECONNREFUSED`
 The client could not reach the router.
 **Check:**
 1. `baseUrl` is correct — the default is `https://api.nomyo.ai` (port **12435**).
 2. You have network access to the host.
 3. TLS is not being blocked by a proxy or firewall.
 ### `SecurityError: HTTPS is required`
 You passed an `http://` URL without setting `allowHttp: true`.
 ```javascript
 // Local dev only
 const client = new SecureChatCompletion({
  baseUrl: 'http://localhost:12435',
  allowHttp: true,
 });
 ```
 Never set `allowHttp: true` in production — the server public key fetch and all request data would travel unencrypted.
 ### `APIConnectionError: Request timed out`
 The default timeout is 60 seconds. Larger models or busy endpoints may need more:
 ```javascript
 const client = new SecureChatCompletion({
  apiKey: process.env.NOMYO_API_KEY,
  timeout: 120000,  // 2 minutes
 });
 ```
 ---
 ## Key Loading Failures
 ### `Error: Failed to load keys: no such file or directory`
 The `keyDir` directory or the PEM files inside it don't exist. On first run the library generates and saves a new key pair automatically. If you specified a custom `keyDir`, make sure the directory is writable:
 ```javascript
 const client = new SecureChatCompletion({
  apiKey: process.env.NOMYO_API_KEY,
  keyDir: '/var/lib/myapp/nomyo-keys',  // directory must exist and be writable
 });
 ```
 ### `Error: Invalid passphrase` / `Error: Failed to decrypt private key`
 The password you passed to `loadKeys()` or `keyRotationPassword` doesn't match what was used to encrypt the file.
 ```javascript
 await client.loadKeys(
  'client_keys/private_key.pem',
  'client_keys/public_key.pem',
  process.env.NOMYO_KEY_PASSWORD,  // must match the password used on generateKeys()
 );
 ```
 ### `Error: RSA key too small`
 The library enforces a minimum key size of 2048 bits. If you have old 1024-bit keys, regenerate them:
 ```javascript
 await client.generateKeys({
  saveToFile: true,
  keyDir: 'client_keys',
  keySize: 4096,  // recommended
 });
 ```
 ### `Error: Failed to load keys` (browser)
 Key loading from files is a Node.js-only feature. In browsers, keys are generated in memory on first use. Do not call `loadKeys()` in a browser context.
 ---
 ## Rate Limit Errors
 ### `RateLimitError: Rate limit exceeded`
 All automatic retries were exhausted. The default limit is 2 requests/second; burst allows 4 requests/second once per 10-second window.
 **Fixes:**
 - Reduce concurrency — avoid large `Promise.all` batches.
 - Add client-side throttling (see [Rate Limits](rate-limits.md)).
 - Increase `maxRetries` so the client backs off longer before giving up:
 ```javascript
 const client = new SecureChatCompletion({
  apiKey: process.env.NOMYO_API_KEY,
  maxRetries: 5,
 });
 ```
 ### `ServiceUnavailableError` with 30-minute cool-down
 Burst limits were hit repeatedly and a cool-down was applied to your key. Wait 30 minutes, then review your request patterns.
 ---
 ## Model / Tier Errors
 ### `ForbiddenError: Model not allowed for this security tier`
 The model you requested is not available at the security tier you specified. Try a lower tier or a different model:
 ```javascript
 // If 'maximum' tier rejects the model, try 'high' or 'standard'
 const response = await client.create({
  model: 'Qwen/Qwen3.5-27B',
  messages: [...],
  security_tier: 'high',  // try 'standard' if still rejected
 });
 ```
 See [Models — Security Tier Compatibility](models.md#security-tier-compatibility) for details.
 ---
 ## Crypto / Security Errors
 ### `SecurityError: Decryption failed`
 The response could not be decrypted. This is intentionally vague to avoid leaking crypto details.
 **Possible causes:**
 - The server returned a malformed response (check `debug: true` output).
 - A network proxy modified the response body.
 - The server's public key changed mid-session — the next request will re-fetch it automatically.
 Enable debug mode to log the raw response and narrow the cause:
 ```javascript
 const client = new SecureChatCompletion({
  apiKey: process.env.NOMYO_API_KEY,
  debug: true,
 });
 ```
 ### `Error: Unsupported protocol version` / `Error: Unsupported encryption algorithm`
 The server sent a response in a protocol version or with an encryption algorithm not supported by this client version. Update the package:
 ```bash
 npm update nomyo-js
 ```
 ---
 ## `DisposedError`: Method called after `dispose()`
 You called a method on a client that has already been disposed.
 ```javascript
 client.dispose();
 await client.create(...);  // throws DisposedError
 ```
 Create a new client instance if you need to make more requests after disposal.
 ---
 ## Memory Protection Warnings
 ### `getMemoryProtectionInfo()` returns `method: 'zero-only'`
 This is normal for a pure JavaScript installation. The library zeroes sensitive buffers immediately after use but cannot lock pages to prevent swapping (OS `mlock` requires a native addon).
 ```javascript
 import { getMemoryProtectionInfo } from 'nomyo-js';
 const info = getMemoryProtectionInfo();
 // { method: 'zero-only', canLock: false, isPlatformSecure: false }
 ```
 For environments where swap-file exposure is unacceptable (HIPAA PHI, classified data), install the optional `nomyo-native` addon or run on a system with swap disabled.
 ---
 ## Node.js-Specific Issues
 ### `ReferenceError: crypto is not defined`
 In CommonJS modules on Node.js before v19, `crypto` is not a global. Import it explicitly:
 ```javascript
 // CommonJS
 const { webcrypto } = require('crypto');
 global.crypto = webcrypto;
 // Or switch to ES modules (recommended)
 // package.json: "type": "module"
 ```
 The library itself imports `crypto` correctly — this error only appears if your own application code tries to use `crypto` directly.
 ### `SyntaxError: Cannot use import statement in a module` / CommonJS vs ESM
 The package ships both CommonJS (`dist/node/`) and ESM (`dist/esm/`) builds. Node.js selects the correct one automatically via `package.json` `exports`. If you see import errors, check that your `package.json` or bundler is not forcing the wrong format.
 For ESM: set `"type": "module"` in your `package.json` or use `.mjs` file extensions.
 For CommonJS: use `require('nomyo-js')` or `.cjs` extensions.
 ### TypeScript: `Cannot find module 'nomyo-js'` / missing types
 Ensure your `tsconfig.json` includes `"moduleResolution": "bundler"` or `"moduleResolution": "node16"` and that `nomyo-js` is in `dependencies` (not just `devDependencies`):
 ```bash
 npm install nomyo-js
 ```
 ---
 ## Browser-Specific Issues
 ### `Content Security Policy blocked`
 If your app's CSP restricts `script-src` or `connect-src`, add the NOMYO API domain:
 ```
 Content-Security-Policy: connect-src https://api.nomyo.ai;
 ```
 ### `TypeError: Failed to fetch` (CORS)
 The NOMYO API includes CORS headers. If you see CORS errors in a browser, verify the `baseUrl` is correct (HTTPS, correct port) and that no browser extension is blocking the request.
 ### Keys not persisted across page reloads
 This is expected behaviour — browsers do not have file system access. Keys are generated fresh on each page load. If you need persistent keys in a browser context, implement your own `loadKeys`/`generateKeys` wrapper using `localStorage` or `IndexedDB` (not recommended for high-security scenarios).
 ---
 ## Debugging Tips
 ### Enable verbose logging
 ```javascript
 const client = new SecureChatCompletion({
  apiKey: process.env.NOMYO_API_KEY,
  debug: true,
 });
 ```
 Debug mode logs: key generation/loading, server public key fetches, request encryption details, retry attempts, and response decryption.
 ### Check memory protection status
 ```javascript
 import { getMemoryProtectionInfo } from 'nomyo-js';
 console.log(getMemoryProtectionInfo());
 ```
 ### Inspect response metadata
 The `_metadata` field in every response carries server-side diagnostics:
 ```javascript
 const response = await client.create({ ... });
 console.log(response._metadata);
 // {
 //   payload_id: '...',
 //   is_encrypted: true,
 //   encryption_algorithm: 'hybrid-aes256-rsa4096',
 //   security_tier: 'standard',
 //   memory_protection: { ... },
 // }
 ```
 ### Test with minimum configuration
 Strip all optional configuration and test with the simplest possible call to isolate the issue:
 ```javascript
 import { SecureChatCompletion } from 'nomyo-js';
 const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY });
 const r = await client.create({
  model: 'Qwen/Qwen3-0.6B',
  messages: [{ role: 'user', content: 'ping' }],
 });
 console.log(r.choices[0].message.content);
 client.dispose();
 ```
--- a/docs/SECURITY.md
+++ b/docs/SECURITY.md
@ -143,7 +143,7 @@ const client = new SecureChatCompletion({ baseUrl: 'https://...' });
 # .env file (never commit to git)
 NOMYO_API_KEY=your-api-key
 NOMYO_KEY_PASSWORD=your-key-password
-NOMYO_SERVER_URL=https://api.nomyo.ai:12434
+NOMYO_SERVER_URL=https://api.nomyo.ai
 ```
 ---
--- a/examples/node/basic.js
+++ b/examples/node/basic.js
@ -7,7 +7,7 @@ import { SecureChatCompletion } from 'nomyo-js';
 async function main() {
    // Initialize client
    const client = new SecureChatCompletion({
-        baseUrl: 'https://api.nomyo.ai:12434',
+        baseUrl: 'https://api.nomyo.ai',
        // For local development, use:
        // baseUrl: 'http://localhost:12434',
        // allowHttp: true
--- a/examples/node/with-tools.js
+++ b/examples/node/with-tools.js
@ -6,7 +6,7 @@ import { SecureChatCompletion } from 'nomyo-js';
 async function main() {
    const client = new SecureChatCompletion({
-        baseUrl: 'https://api.nomyo.ai:12434'
+        baseUrl: 'https://api.nomyo.ai'
    });
    try {
--- a/src/api/SecureChatCompletion.ts
+++ b/src/api/SecureChatCompletion.ts
@ -15,7 +15,7 @@ export class SecureChatCompletion {
    constructor(config: ChatCompletionConfig = {}) {
        const {
-            baseUrl = 'https://api.nomyo.ai:12435',
+            baseUrl = 'https://api.nomyo.ai',
            allowHttp = false,
            apiKey,
            secureMemory = true,
--- a/src/core/SecureCompletionClient.ts
+++ b/src/core/SecureCompletionClient.ts
@ -79,9 +79,9 @@ export class SecureCompletionClient {
    // Promise-based mutex: serialises concurrent ensureKeys() calls
    private ensureKeysLock: Promise<void> = Promise.resolve();
-    constructor(config: ClientConfig = { routerUrl: 'https://api.nomyo.ai:12435' }) {
+    constructor(config: ClientConfig = { routerUrl: 'https://api.nomyo.ai' }) {
        const {
-            routerUrl = 'https://api.nomyo.ai:12435',
+            routerUrl = 'https://api.nomyo.ai',
            allowHttp = false,
            secureMemory = true,
            keySize = 4096,
--- a/src/types/client.ts
+++ b/src/types/client.ts
@ -3,7 +3,7 @@
 */
 export interface ClientConfig {
-    /** Base URL of the NOMYO router (e.g., https://api.nomyo.ai:12434) */
+    /** Base URL of the NOMYO router (e.g., https://api.nomyo.ai) */
    routerUrl: string;
    /** Allow HTTP connections (ONLY for local development, never in production) */