fix: base_url

doc: created
2026-04-16 16:44:26 +02:00 · 2026-04-16 16:44:26 +02:00 · 43165f86f2
commit 43165f86f2
parent 6e02559f4e
17 changed files with 2151 additions and 293 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -0,0 +1,11 @@
+# Contributing
+
+## Development Setup
+
+```bash
+npm install
+npm run build
+npm test
+```
+
+Node.js 18 LTS or later is required for tests and the TypeScript compiler.
--- a/README.md
+++ b/README.md
@ -1,13 +1,13 @@
-# NOMYO.js - Secure JavaScript Chat Client
+# NOMYO.js — Secure JavaScript Chat Client

 **OpenAI-compatible secure chat client with end-to-end encryption for NOMYO Inference Endpoints**

-🔒 **All prompts and responses are automatically encrypted and decrypted**  
-🔑 **Uses hybrid encryption (AES-256-GCM + RSA-OAEP with 4096-bit keys)**  
-🔄 **Drop-in replacement for OpenAI's ChatCompletion API**  
-🌐 **Works in both Node.js and browsers**
+- All prompts and responses are automatically encrypted and decrypted
+- Hybrid encryption: AES-256-GCM payload + RSA-OAEP-SHA256 key exchange, 4096-bit keys
+- Drop-in replacement for OpenAI's ChatCompletion API
+- Works in both Node.js and browsers

-## 🚀 Quick Start
+## Quick Start

 ### Installation

@ -20,371 +20,323 @@ npm install nomyo-js
 ```javascript
 import { SecureChatCompletion } from 'nomyo-js';

-// Initialize client (defaults to https://api.nomyo.ai:12434)
 const client = new SecureChatCompletion({
-  baseUrl: 'https://api.nomyo.ai:12434'
+  apiKey: process.env.NOMYO_API_KEY,
 });

-// Simple chat completion
 const response = await client.create({
  model: 'Qwen/Qwen3-0.6B',
-  messages: [
-    { role: 'user', content: 'Hello! How are you today?' }
-  ],
-  temperature: 0.7
+  messages: [{ role: 'user', content: 'Hello!' }],
+  temperature: 0.7,
 });

 console.log(response.choices[0].message.content);
+client.dispose();
 ```

 ### Basic Usage (Browser)

 ```html
-<!DOCTYPE html>
-<html>
-<head>
-  <script type="module">
+<script type="module">
  import { SecureChatCompletion } from 'https://unpkg.com/nomyo-js/dist/browser/index.js';

  const client = new SecureChatCompletion({
-      baseUrl: 'https://api.nomyo.ai:12434'
+    baseUrl: 'https://api.nomyo.ai',
+    apiKey:  'your-api-key',
  });

  const response = await client.create({
    model: 'Qwen/Qwen3-0.6B',
-      messages: [
-        { role: 'user', content: 'What is 2+2?' }
-      ]
+    messages: [{ role: 'user', content: 'What is 2+2?' }],
  });

  console.log(response.choices[0].message.content);
-  </script>
-</head>
-<body>
-  <h1>NOMYO Secure Chat</h1>
-</body>
-</html>
+</script>
 ```

-## 🔐 Security Features
+## Documentation
+
+Full documentation is in the [`doc/`](doc/) directory:
+
+- [Getting Started](doc/getting-started.md) — walkthrough for new users
+- [API Reference](doc/api-reference.md) — complete constructor options, methods, types, and error classes
+- [Models](doc/models.md) — available models and selection guide
+- [Security Guide](doc/security-guide.md) — encryption, memory protection, key management, compliance
+- [Rate Limits](doc/rate-limits.md) — limits, automatic retry behaviour, batch throttling
+- [Examples](doc/examples.md) — 12+ code examples for common scenarios
+- [Troubleshooting](doc/troubleshooting.md) — error reference and debugging tips
+
+## Security Features

 ### Hybrid Encryption

-**Payload encryption**: AES-256-GCM (authenticated encryption)
- **Key exchange**: RSA-OAEP with SHA-256
+- **Payload encryption**: AES-256-GCM (authenticated encryption)
+- **Key exchange**: RSA-OAEP-SHA256
 - **Key size**: 4096-bit RSA keys
- **All communication**: End-to-end encrypted
+- **Scope**: All communication is end-to-end encrypted

 ### Key Management

- **Automatic key generation**: Keys are automatically generated on first use
- **Automatic key loading**: Existing keys are loaded automatically from `client_keys/` directory (Node.js only)
- **No manual intervention required**: The library handles key management automatically
- **Optional persistence**: Keys can be saved to `client_keys/` directory for reuse across sessions (Node.js only)
- **Password protection**: Optional password encryption for private keys (minimum 8 characters required)
- **Secure permissions**: Private keys stored with restricted permissions (600 - owner-only access)
- **Automatic key rotation**: Keys are rotated on a configurable interval (default: 24 hours) to limit fingerprint lifetime
- **Explicit lifecycle management**: Call `dispose()` to immediately zero in-memory key material and stop the rotation timer
+- **Automatic**: Keys are generated on first use and saved to `keyDir` (default: `client_keys/`). Existing keys are reloaded on subsequent runs. Node.js only.
+- **Password protection**: Optional AES-encrypted private key files (minimum 8 characters).
+- **Secure permissions**: Private key files saved at `0600` (owner-only).
+- **Auto-rotation**: Keys rotate every 24 hours by default (configurable via `keyRotationInterval`).
+- **Explicit lifecycle**: Call `dispose()` to zero in-memory key material and stop the rotation timer.

-### Secure Memory Protection
+### Secure Memory

-> [!NOTE]
-> **Pure JavaScript Implementation**: This version uses pure JavaScript with immediate memory zeroing.
-> OS-level memory locking (`mlock`) is NOT available without a native addon.
-> For enhanced security in production, consider implementing the optional native addon (see `native/` directory).
+The library wraps all intermediate sensitive buffers (AES keys, plaintext payload, decrypted bytes) in `SecureByteContext`, which zeroes them in a `finally` block immediately after use.

- **Automatic cleanup**: Sensitive data is zeroed from memory immediately after use
- **Best-effort protection**: Minimizes exposure time of sensitive data
- **Fallback mechanism**: Graceful degradation if enhanced security is unavailable
-
-## 🔄 OpenAI Compatibility
-
-The `SecureChatCompletion` class provides **exact API compatibility** with OpenAI's `ChatCompletion.create()` method.
-
-### Supported Parameters
-
-All standard OpenAI parameters are supported:
-
- `model`: Model identifier
- `messages`: List of message objects
- `temperature`: Sampling temperature (0-2)
- `max_tokens`: Maximum tokens to generate
- `top_p`: Nucleus sampling
- `frequency_penalty`: Frequency penalty
- `presence_penalty`: Presence penalty
- `stop`: Stop sequences
- `n`: Number of completions
- `tools`: Tool definitions
- `tool_choice`: Tool selection strategy
- `user`: User identifier
-
-### Response Format
-
-Responses follow the OpenAI format exactly, with an additional `_metadata` field for debugging and security information:
+Pure JavaScript cannot lock pages to prevent OS swapping (`mlock`). For environments where swap-file exposure is unacceptable, install the optional `nomyo-native` addon. Check the current protection level:

 ```javascript
-{
-  "id": "chatcmpl-123",
-  "object": "chat.completion",
-  "created": 1234567890,
-  "model": "Qwen/Qwen3-0.6B",
-  "choices": [
-    {
-      "index": 0,
-      "message": {
-        "role": "assistant",
-        "content": "Hello! I'm doing well, thank you for asking."
-      },
-      "finish_reason": "stop"
-    }
-  ],
-  "usage": {
-    "prompt_tokens": 10,
-    "completion_tokens": 20,
-    "total_tokens": 30
-  },
-  "_metadata": {
-    "payload_id": "openai-compat-abc123",
-    "processed_at": 1765250382,
-    "is_encrypted": true,
-    "encryption_algorithm": "hybrid-aes256-rsa4096",
-    "response_status": "success"
-  }
+import { getMemoryProtectionInfo } from 'nomyo-js';
+
+const info = getMemoryProtectionInfo();
+// Without addon: { method: 'zero-only', canLock: false }
+// With addon:    { method: 'mlock',     canLock: true  }
+```
+
+### Security Tiers
+
+Pass `security_tier` per request to route inference to increasingly isolated hardware:
+
+| Tier | Hardware | Use case |
+|------|----------|----------|
+| `"standard"` | GPU | General secure inference |
+| `"high"` | CPU/GPU balanced | Sensitive business data |
+| `"maximum"` | CPU only | HIPAA PHI, classified data |
+
+```javascript
+const response = await client.create({
+  model:         'Qwen/Qwen3-0.6B',
+  messages:      [{ role: 'user', content: 'Patient record summary...' }],
+  security_tier: 'maximum',
+});
+```
+
+## Usage Examples
+
+### With API Key
+
+```javascript
+const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY });
+```
+
+### Error Handling
+
+```javascript
+import {
+  SecureChatCompletion,
+  AuthenticationError,
+  RateLimitError,
+  ForbiddenError,
+} from 'nomyo-js';
+
+try {
+  const response = await client.create({ model: 'Qwen/Qwen3-0.6B', messages: [...] });
+} catch (err) {
+  if (err instanceof AuthenticationError) console.error('Check API key:', err.message);
+  else if (err instanceof RateLimitError)    console.error('Rate limit hit:', err.message);
+  else if (err instanceof ForbiddenError)    console.error('Model/tier mismatch:', err.message);
+  else throw err;
 }
 ```

-## 🛠️ Usage Examples
+### Per-Request Router Override

-### Basic Chat
+Send a single request to a different router without changing the main client:

 ```javascript
-import { SecureChatCompletion } from 'nomyo-js';
-
-const client = new SecureChatCompletion({
-  baseUrl: 'https://api.nomyo.ai:12434'
-});
-
 const response = await client.create({
  model:    'Qwen/Qwen3-0.6B',
-  messages: [
-    { role: 'system', content: 'You are a helpful assistant.' },
-    { role: 'user', content: 'What is the capital of France?' }
-  ],
-  temperature: 0.7
+  messages: [{ role: 'user', content: 'Hello from secondary router' }],
+  base_url: 'https://secondary.nomyo.ai:12435',  // temporary — main client unchanged
 });
-
-console.log(response.choices[0].message.content);
 ```

-### With Tools
+### Tool / Function Calling

 ```javascript
 const response = await client.create({
  model:    'Qwen/Qwen3-0.6B',
-  messages: [
-    { role: 'user', content: "What's the weather in Paris?" }
-  ],
+  messages: [{ role: 'user', content: "What's the weather in Paris?" }],
  tools: [
    {
      type: 'function',
      function: {
        name:        'get_weather',
-        description: 'Get weather information',
+        description: 'Get weather information for a location',
        parameters: {
          type: 'object',
-          properties: {
-            location: { type: 'string' }
+          properties: { location: { type: 'string' } },
+          required: ['location'],
        },
-          required: ['location']
-        }
-      }
-    }
-  ]
+      },
+    },
+  ],
+  tool_choice: 'auto',
 });
 ```

-### With API Key Authentication
+### Thinking Models

 ```javascript
-const client = new SecureChatCompletion({
-  baseUrl: 'https://api.nomyo.ai:12434',
-  apiKey: 'your-api-key-here'
-});
-
-// API key will be automatically included in all requests
 const response = await client.create({
-  model: 'Qwen/Qwen3-0.6B',
-  messages: [
-    { role: 'user', content: 'Hello!' }
-  ]
-});
-```
-
-### Custom Key Management (Node.js)
-
-```javascript
-import { SecureCompletionClient } from 'nomyo-js';
-
-const client = new SecureCompletionClient({
-  routerUrl: 'https://api.nomyo.ai:12434'
+  model:    'LiquidAI/LFM2.5-1.2B-Thinking',
+  messages: [{ role: 'user', content: 'Is 9.9 larger than 9.11?' }],
 });

-// Generate keys with password protection
-await client.generateKeys({
-  saveToFile: true,
-  keyDir: 'client_keys',
-  password: 'your-secure-password'
-});
-
-// Or load existing keys
-await client.loadKeys(
-  'client_keys/private_key.pem',
-  'client_keys/public_key.pem',
-  'your-secure-password'
-);
+const { content, reasoning_content } = response.choices[0].message;
+console.log('Reasoning:', reasoning_content);
+console.log('Answer:',    content);
 ```

 ### Resource Management

-Always call `dispose()` when finished to zero key material and stop the background rotation timer:
-
 ```javascript
-const client = new SecureChatCompletion({
-  baseUrl: 'https://api.nomyo.ai:12434',
-  keyRotationInterval: 3600000,  // rotate every hour
-});
+const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY });

 try {
  const response = await client.create({ model: 'Qwen/Qwen3-0.6B', messages: [...] });
  console.log(response.choices[0].message.content);
 } finally {
-  client.dispose();
+  client.dispose();  // zeros key material, stops rotation timer
 }
 ```

-To disable key rotation entirely (e.g. short-lived scripts):
-
-```javascript
-const client = new SecureChatCompletion({
-  baseUrl: 'https://api.nomyo.ai:12434',
-  keyRotationInterval: 0,  // disabled
-});
-```
-
-## 🧪 Platform Support
-
-### Node.js
-
- **Minimum version**: Node.js 14.17+
- **Recommended**: Node.js 18 LTS or later
- **Key storage**: File system (`client_keys/` directory)
- **Security**: Full implementation with automatic key persistence
-
-### Browsers
-
- **Supported browsers**: Modern browsers with Web Crypto API support
-  - Chrome 37+
-  - Firefox 34+
-  - Safari 11+
-  - Edge 79+
- **Key storage**: In-memory only (keys not persisted for security)
- **Security**: Best-effort memory protection (no OS-level locking)
-
-## 📚 API Reference
-
-### SecureChatCompletion
-
-#### Constructor
-
-```typescript
-new SecureChatCompletion(config?: {
-  baseUrl?: string;                // Default: 'https://api.nomyo.ai:12434'
-  allowHttp?: boolean;             // Default: false
-  apiKey?: string;                 // Default: undefined
-  secureMemory?: boolean;          // Default: true
-  timeout?: number;                // Request timeout ms. Default: 60000
-  debug?: boolean;                 // Enable verbose logging. Default: false
-  keyRotationInterval?: number;    // Key rotation ms. 0 = disabled. Default: 86400000 (24h)
-  keyRotationDir?: string;         // Directory for rotated keys. Default: 'client_keys'
-  keyRotationPassword?: string;    // Password for rotated key files
-})
-```
-
-#### Methods
-
- `create(request: ChatCompletionRequest): Promise<ChatCompletionResponse>`
- `acreate(request: ChatCompletionRequest): Promise<ChatCompletionResponse>` (alias)
- `dispose(): void` — zero key material and stop rotation timer
-
-### SecureCompletionClient
-
-Lower-level API for advanced use cases.
-
-#### Constructor
-
-```typescript
-new SecureCompletionClient(config?: {
-  routerUrl?: string;              // Default: 'https://api.nomyo.ai:12434'
-  allowHttp?: boolean;             // Default: false
-  secureMemory?: boolean;          // Default: true
-  keySize?: 2048 | 4096;          // Default: 4096
-  timeout?: number;                // Request timeout ms. Default: 60000
-  debug?: boolean;                 // Enable verbose logging. Default: false
-  keyRotationInterval?: number;    // Key rotation ms. 0 = disabled. Default: 86400000 (24h)
-  keyRotationDir?: string;         // Directory for rotated keys. Default: 'client_keys'
-  keyRotationPassword?: string;    // Password for rotated key files
-})
-```
-
-#### Methods
-
- `generateKeys(options?: KeyGenOptions): Promise<void>`
- `loadKeys(privateKeyPath: string, publicKeyPath?: string, password?: string): Promise<void>`
- `fetchServerPublicKey(): Promise<string>`
- `encryptPayload(payload: object): Promise<ArrayBuffer>`
- `decryptResponse(encrypted: ArrayBuffer, payloadId: string): Promise<object>`
- `sendSecureRequest(payload: object, payloadId: string, apiKey?: string): Promise<object>`
- `dispose(): void` — zero key material and stop rotation timer
-
-## 🔧 Configuration
-
 ### Local Development (HTTP)

 ```javascript
 const client = new SecureChatCompletion({
-  baseUrl: 'http://localhost:12434',
-  allowHttp: true  // Required for HTTP connections
+  baseUrl:   'http://localhost:12435',
+  allowHttp: true,  // required — also prints a visible warning
 });
 ```

-⚠️ **Warning**: Only use HTTP for local development. Never use in production!
+## API Reference

-### Disable Secure Memory
+### `SecureChatCompletion` — Constructor Options

-```javascript
-const client = new SecureChatCompletion({
-  baseUrl: 'https://api.nomyo.ai:12434',
-  secureMemory: false  // Disable memory protection (not recommended)
-});
+```typescript
+new SecureChatCompletion(config?: ChatCompletionConfig)
 ```

-## 📝 Security Best Practices
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `baseUrl` | `string` | `'https://api.nomyo.ai'` | NOMYO router URL. Must be HTTPS in production. |
+| `allowHttp` | `boolean` | `false` | Allow HTTP connections. Local development only. |
+| `apiKey` | `string` | `undefined` | Bearer token for `Authorization` header. |
+| `secureMemory` | `boolean` | `true` | Zero sensitive buffers immediately after use. |
+| `timeout` | `number` | `60000` | Request timeout in milliseconds. |
+| `debug` | `boolean` | `false` | Print verbose logging to the console. |
+| `keyDir` | `string` | `'client_keys'` | Directory to load/save RSA keys on startup. |
+| `keyRotationInterval` | `number` | `86400000` | Auto-rotate keys every N ms. `0` disables rotation. |
+| `keyRotationDir` | `string` | `'client_keys'` | Directory for rotated key files. Node.js only. |
+| `keyRotationPassword` | `string` | `undefined` | Password for encrypted rotated key files. |
+| `maxRetries` | `number` | `2` | Extra retry attempts on 429/5xx/network errors. Exponential backoff (1 s, 2 s, …). |

- ✅ Always use HTTPS in production
- ✅ Use password protection for private keys (Node.js)
- ✅ Keep private keys secure (permissions set to 600)
- ✅ Never share your private key
- ✅ Verify server's public key fingerprint before first use
- ✅ Enable secure memory protection (default)
+#### Methods

-## 🤝 Contributing
+- `create(request): Promise<ChatCompletionResponse>` — send an encrypted chat completion
+- `acreate(request): Promise<ChatCompletionResponse>` — alias for `create()`
+- `dispose(): void` — zero key material and stop the rotation timer

-Contributions are welcome! Please open issues or pull requests on the project repository.
+#### `create()` Request Fields

-## 📄 License
+All standard OpenAI fields (`model`, `messages`, `temperature`, `top_p`, `max_tokens`, `stop`, `n`, `tools`, `tool_choice`, `user`, `frequency_penalty`, `presence_penalty`, `logit_bias`) plus:

-See LICENSE file for licensing information.
+| Field | Description |
+|-------|-------------|
+| `security_tier` | `"standard"` \| `"high"` \| `"maximum"` — hardware isolation level |
+| `api_key` | Per-request API key override |
+| `base_url` | Per-request router URL override — creates a temporary client, used once, then disposed |

-## 📞 Support
+### `SecureCompletionClient` — Constructor Options

-For questions or issues, please refer to the project documentation or open an issue.
+Lower-level client. All options above apply, with these differences:
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `routerUrl` | `string` | `'https://api.nomyo.ai'` | Base URL (`baseUrl` is renamed here) |
+| `keySize` | `2048 \| 4096` | `4096` | RSA modulus length |
+
+#### Methods
+
+- `generateKeys(options?)` — generate a new RSA key pair
+- `loadKeys(privateKeyPath, publicKeyPath?, password?)` — load existing PEM files
+- `fetchServerPublicKey()` — fetch the server's RSA public key
+- `encryptPayload(payload)` — encrypt a request payload
+- `decryptResponse(encrypted, payloadId)` — decrypt a response body
+- `sendSecureRequest(payload, payloadId, apiKey?, securityTier?)` — full encrypt → POST → decrypt cycle
+- `dispose()` — zero key material and stop rotation timer
+
+### Secure Memory Public API
+
+```typescript
+import {
+  getMemoryProtectionInfo,
+  disableSecureMemory,
+  enableSecureMemory,
+  SecureByteContext,
+} from 'nomyo-js';
+```
+
+| Export | Description |
+|--------|-------------|
+| `getMemoryProtectionInfo()` | Returns `{ method, canLock, isPlatformSecure, details? }` |
+| `disableSecureMemory()` | Disable global secure-memory zeroing |
+| `enableSecureMemory()` | Re-enable global secure-memory zeroing |
+| `SecureByteContext` | Low-level buffer wrapper — zeros in `finally` block |
+
+### Error Classes
+
+```typescript
+import {
+  AuthenticationError, InvalidRequestError, RateLimitError,
+  ForbiddenError, ServerError, ServiceUnavailableError,
+  APIConnectionError, SecurityError, DisposedError, APIError,
+} from 'nomyo-js';
+```
+
+| Class | HTTP | Thrown when |
+|-------|------|-------------|
+| `AuthenticationError` | 401 | Invalid or missing API key |
+| `InvalidRequestError` | 400 | Malformed request |
+| `ForbiddenError` | 403 | Model not allowed for the security tier |
+| `RateLimitError` | 429 | Rate limit exceeded (after all retries) |
+| `ServerError` | 500 | Internal server error (after all retries) |
+| `ServiceUnavailableError` | 503 | Backend unavailable (after all retries) |
+| `APIError` | varies | Other HTTP errors |
+| `APIConnectionError` | — | Network failure or timeout (after all retries) |
+| `SecurityError` | — | HTTPS not used, header injection, or crypto failure |
+| `DisposedError` | — | Method called after `dispose()` |
+
+## Platform Support
+
+### Node.js
+
+- **Minimum**: Node.js 14.17+
+- **Recommended**: Node.js 18 LTS or later
+- **Key storage**: File system (`keyDir` directory, default `client_keys/`)
+
+### Browsers
+
+- **Supported**: Chrome 37+, Firefox 34+, Safari 11+, Edge 79+
+- **Key storage**: In-memory only (not persisted)
+- **Limitation**: File-based key operations (`keyDir`, `loadKeys`) are not available
+
+## Security Best Practices
+
+- Always use HTTPS (`allowHttp` is `false` by default)
+- Load API key from an environment variable, never hardcode it
+- Use password-protected key files (`keyRotationPassword`)
+- Store keys outside the project directory and outside version control
+- Add `client_keys/` and `*.pem` to `.gitignore`
+- Call `dispose()` when the client is no longer needed
+- Use `security_tier: 'maximum'` for HIPAA PHI or classified data
+- Consider the `nomyo-native` addon if swap-file exposure is unacceptable
+
+## License
+
+See LICENSE file.
--- a/doc/README.md
+++ b/doc/README.md
@ -0,0 +1,49 @@
+# NOMYO.js Documentation
+
+Comprehensive documentation for the NOMYO secure JavaScript/TypeScript chat client — a drop-in replacement for OpenAI's `ChatCompletion` API with end-to-end encryption.
+
+To use this library you need an active subscription on [NOMYO Inference](https://chat.nomyo.ai/).
+
+## Quick Start
+
+```javascript
+import { SecureChatCompletion } from 'nomyo-js';
+
+const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY });
+
+const response = await client.create({
+  model: 'Qwen/Qwen3-0.6B',
+  messages: [{ role: 'user', content: 'Hello!' }],
+  security_tier: 'standard',
+});
+
+console.log(response.choices[0].message.content);
+```
+
+## Documentation
+
+1. [Installation](installation.md) — npm, CDN, and native addon setup
+2. [Getting Started](getting-started.md) — first request, auth, security tiers, error handling
+3. [API Reference](api-reference.md) — complete constructor options, methods, and types
+4. [Models](models.md) — available models and selection guidance
+5. [Security Guide](security-guide.md) — encryption architecture, best practices, and compliance
+6. [Rate Limits](rate-limits.md) — request limits, burst behaviour, and retry strategy
+7. [Examples](examples.md) — real-world scenarios, browser usage, and advanced patterns
+8. [Troubleshooting](troubleshooting.md) — common errors and their fixes
+
+---
+
+## Key Features
+
+- **End-to-end encryption** — AES-256-GCM + RSA-OAEP-4096. No plaintext ever leaves your process.
+- **OpenAI-compatible API** — `create()` / `acreate()` accept the same parameters as the OpenAI SDK.
+- **Browser + Node.js** — single package, separate entry points for each runtime.
+- **Automatic key management** — keys are generated on first use and optionally persisted to disk (Node.js).
+- **Automatic key rotation** — RSA keys rotate on a configurable interval (default 24 h) to limit fingerprint lifetime.
+- **Security tiers** — per-request routing to `standard`, `high`, or `maximum` isolation hardware.
+- **Retry with exponential backoff** — automatic retries on 429 / 5xx / network errors (configurable).
+- **Resource lifecycle** — `dispose()` immediately zeros in-memory key material and stops the rotation timer.
+
+## Technical Security Docs
+
+For cryptographic architecture, threat model, and implementation status see [SECURITY.md](../docs/SECURITY.md).
--- a/doc/api-reference.md
+++ b/doc/api-reference.md
@ -0,0 +1,272 @@
+# API Reference
+
+## `SecureChatCompletion`
+
+High-level OpenAI-compatible client. The recommended entry point for most use cases.
+
+### Constructor
+
+```typescript
+new SecureChatCompletion(config?: ChatCompletionConfig)
+```
+
+#### `ChatCompletionConfig`
+
+
+| Option                | Type      | Default                  | Description                                                                                                                                                               |
+| ----------------------- | ----------- | -------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `baseUrl`             | `string`  | `'https://api.nomyo.ai'` | NOMYO router URL. Must be HTTPS in production.                                                                                                                            |
+| `allowHttp`           | `boolean` | `false`                  | Allow HTTP connections.**Local development only.**                                                                                                                        |
+| `apiKey`              | `string`  | `undefined`              | Bearer token sent in`Authorization` header.                                                                                                                               |
+| `secureMemory`        | `boolean` | `true`                   | Enable immediate zeroing of sensitive buffers after use.                                                                                                                  |
+| `timeout`             | `number`  | `60000`                  | Request timeout in milliseconds.                                                                                                                                          |
+| `debug`               | `boolean` | `false`                  | Print verbose logging to the console.                                                                                                                                     |
+| `keyDir`              | `string`  | `'client_keys'`          | Directory to load/save RSA keys on startup. If the directory contains an existing key pair it is loaded; otherwise a new pair is generated and saved there. Node.js only. |
+| `keyRotationInterval` | `number`  | `86400000` (24 h)        | Auto-rotate RSA keys every N milliseconds. Set to`0` to disable.                                                                                                          |
+| `keyRotationDir`      | `string`  | `'client_keys'`          | Directory where rotated key files are saved. Node.js only.                                                                                                                |
+| `keyRotationPassword` | `string`  | `undefined`              | Password used to encrypt rotated key files.                                                                                                                               |
+| `maxRetries`          | `number`  | `2`                      | Maximum extra attempts on retryable errors (429, 500, 502, 503, 504, network errors). Uses exponential backoff (1 s, 2 s, …). Set to`0` to disable retries.              |
+
+### Methods
+
+#### `create(request): Promise<ChatCompletionResponse>`
+
+Send an encrypted chat completion request. Returns the decrypted response.
+
+```typescript
+async create(request: ChatCompletionRequest): Promise<ChatCompletionResponse>
+```
+
+**`ChatCompletionRequest` fields:**
+
+
+| Field               | Type                     | Description                                                                                                                    |
+| --------------------- | -------------------------- | -------------------------------------------------------------------------------------------------------------------------------- |
+| `model`             | `string`                 | **Required.** Model ID (see [Models](models.md)).                                                                              |
+| `messages`          | `Message[]`              | **Required.** Conversation history.                                                                                            |
+| `temperature`       | `number`                 | Sampling temperature (0–2).                                                                                                   |
+| `top_p`             | `number`                 | Nucleus sampling.                                                                                                              |
+| `max_tokens`        | `number`                 | Maximum tokens to generate.                                                                                                    |
+| `stop`              | `string | string[]`      | Stop sequences.                                                                                                                |
+| `n`                 | `number`                 | Number of completions to generate.                                                                                             |
+| `stream`            | `boolean`                | Ignored server-side (encryption requires full response).                                                                       |
+| `presence_penalty`  | `number`                 | Presence penalty (−2.0–2.0).                                                                                                 |
+| `frequency_penalty` | `number`                 | Frequency penalty (−2.0–2.0).                                                                                                |
+| `logit_bias`        | `Record<string, number>` | Token bias map.                                                                                                                |
+| `user`              | `string`                 | End-user identifier (passed through).                                                                                          |
+| `tools`             | `Tool[]`                 | Tool/function definitions.                                                                                                     |
+| `tool_choice`       | `ToolChoice`             | Tool selection strategy (`"auto"`, `"none"`, `"required"`, or specific tool).                                                  |
+| `security_tier`     | `string`                 | NOMYO-specific.`"standard"` \| `"high"` \| `"maximum"`. Not encrypted into the payload.                                        |
+| `api_key`           | `string`                 | NOMYO-specific. Per-request API key override. Not encrypted into the payload.                                                  |
+| `base_url`          | `string`                 | NOMYO-specific. Per-request router URL override. Creates a temporary client for this one call. Not encrypted into the payload. |
+
+**Response shape (`ChatCompletionResponse`):**
+
+```typescript
+{
+  id: string;
+  object: 'chat.completion';
+  created: number;
+  model: string;
+  choices: Array<{
+    index: number;
+    message: {
+      role: string;
+      content: string;
+      tool_calls?: ToolCall[];       // present if tools were invoked
+      reasoning_content?: string;   // chain-of-thought (Qwen3, DeepSeek-R1, etc.)
+    };
+    finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | null;
+  }>;
+  usage?: {
+    prompt_tokens: number;
+    completion_tokens: number;
+    total_tokens: number;
+  };
+  _metadata?: {
+    payload_id: string;           // echoes the X-Payload-ID sent with the request
+    processed_at: number;         // Unix timestamp of server-side processing
+    is_encrypted: boolean;        // always true for this endpoint
+    encryption_algorithm: string; // e.g. "hybrid-aes256-rsa4096"
+    response_status: string;      // "success" on success
+    security_tier?: string;       // active tier used by the server
+    memory_protection?: {
+      platform: string;
+      memory_locking: boolean;
+      secure_zeroing: boolean;
+      core_dump_prevention: boolean;
+    };
+    cuda_device?: {
+      available: boolean;
+      device_hash: string;        // SHA-256 of device name (not the raw name)
+    };
+  };
+}
+```
+
+#### `acreate(request): Promise<ChatCompletionResponse>`
+
+Alias for `create()`. Provided for code that follows the OpenAI SDK naming convention.
+
+#### `dispose(): void`
+
+Stop the key-rotation timer and sever in-memory RSA key references so they can be garbage-collected. After calling `dispose()`, all methods throw `DisposedError`.
+
+```javascript
+client.dispose();
+```
+
+---
+
+## `SecureCompletionClient`
+
+Lower-level client that exposes key management and individual encryption/decryption operations.
+Use this when you need fine-grained control; for most use cases prefer `SecureChatCompletion`.
+
+### Constructor
+
+```typescript
+new SecureCompletionClient(config?: ClientConfig)
+```
+
+#### `ClientConfig`
+
+All options from `ChatCompletionConfig`, plus:
+
+
+| Option      | Type          | Default                        | Description                                                   |
+| ------------- | --------------- | -------------------------------- | --------------------------------------------------------------- |
+| `routerUrl` | `string`      | `'https://api.nomyo.ai'`       | NOMYO router base URL.                                        |
+| `keySize`   | `2048 | 4096` | `4096`                         | RSA modulus length. 2048 is accepted but 4096 is recommended. |
+
+(`baseUrl` is renamed to `routerUrl` at this level; all other options are identical.)
+
+### Methods
+
+#### `generateKeys(options?): Promise<void>`
+
+Generate a fresh RSA key pair.
+
+```typescript
+await client.generateKeys({
+  keySize?: 2048 | 4096,     // default: 4096
+  saveToFile?: boolean,      // default: false
+  keyDir?: string,           // default: 'client_keys'
+  password?: string,         // minimum 8 characters if provided
+});
+```
+
+#### `loadKeys(privateKeyPath, publicKeyPath?, password?): Promise<void>`
+
+Load an existing key pair from PEM files. Node.js only.
+
+```typescript
+await client.loadKeys(
+  'client_keys/private_key.pem',
+  'client_keys/public_key.pem',  // optional; derived from private key path if omitted
+  'your-password'                // required if private key is encrypted
+);
+```
+
+#### `fetchServerPublicKey(): Promise<string>`
+
+Fetch the server's RSA public key from `/pki/public_key` over HTTPS. Called automatically on every encryption; exposed for diagnostics.
+
+#### `encryptPayload(payload): Promise<ArrayBuffer>`
+
+Encrypt a request payload. Returns the encrypted binary package ready to POST.
+
+#### `decryptResponse(encrypted, payloadId): Promise<object>`
+
+Decrypt a response body received from the secure endpoint.
+
+#### `sendSecureRequest(payload, payloadId, apiKey?, securityTier?): Promise<object>`
+
+Full encrypt → POST → decrypt cycle with retry logic. Called internally by `SecureChatCompletion.create()`.
+
+#### `dispose(): void`
+
+Same as `SecureChatCompletion.dispose()`.
+
+---
+
+## Secure Memory API
+
+```typescript
+import {
+  getMemoryProtectionInfo,
+  disableSecureMemory,
+  enableSecureMemory,
+  SecureByteContext,
+} from 'nomyo-js';
+```
+
+### `getMemoryProtectionInfo(): ProtectionInfo`
+
+Returns information about the memory protection available on the current platform:
+
+```typescript
+interface ProtectionInfo {
+  canLock: boolean;       // true if mlock is available (requires native addon)
+  isPlatformSecure: boolean;
+  method: 'mlock' | 'zero-only' | 'none';
+  details?: string;
+}
+```
+
+### `disableSecureMemory(): void`
+
+Disable secure-memory zeroing globally. Affects new `SecureByteContext` instances that do not pass an explicit `useSecure` argument. Existing client instances are unaffected (they pass `useSecure` explicitly).
+
+### `enableSecureMemory(): void`
+
+Re-enable secure memory operations globally.
+
+### `SecureByteContext`
+
+Low-level context manager that zeros an `ArrayBuffer` in a `finally` block even if an exception occurs. Analogous to Python's `secure_bytearray()` context manager.
+
+```typescript
+const context = new SecureByteContext(sensitiveBuffer);
+const result = await context.use(async (data) => {
+  return doSomethingWith(data);
+});
+// sensitiveBuffer is zeroed here regardless of whether doSomethingWith threw
+```
+
+---
+
+## Error Classes
+
+All errors are exported from the package root.
+
+```typescript
+import {
+  APIError,
+  AuthenticationError,
+  InvalidRequestError,
+  RateLimitError,
+  ForbiddenError,
+  ServerError,
+  ServiceUnavailableError,
+  APIConnectionError,
+  SecurityError,
+  DisposedError,
+} from 'nomyo-js';
+```
+
+
+| Class                     | HTTP status | Thrown when                                                  |
+| --------------------------- | ------------- | -------------------------------------------------------------- |
+| `AuthenticationError`     | 401         | Invalid or missing API key                                   |
+| `InvalidRequestError`     | 400         | Malformed request (e.g. streaming requested)                 |
+| `ForbiddenError`          | 403         | Model not allowed for the requested security tier            |
+| `RateLimitError`          | 429         | Rate limit exceeded (after all retries exhausted)            |
+| `ServerError`             | 500         | Internal server error (after all retries exhausted)          |
+| `ServiceUnavailableError` | 503         | Inference backend unavailable (after all retries exhausted)  |
+| `APIError`                | varies      | Other HTTP errors (404, 502, 504, etc.)                      |
+| `APIConnectionError`      | —          | Network failure or timeout (after all retries exhausted)     |
+| `SecurityError`           | —          | HTTPS not used, header injection detected, or crypto failure |
+| `DisposedError`           | —          | Method called after`dispose()`                               |
+
+All errors that extend `APIError` expose `statusCode?: number` and `errorDetails?: object`.
--- a/doc/examples.md
+++ b/doc/examples.md
@ -0,0 +1,437 @@
+# Examples
+
+## Basic Usage
+
+### Simple Chat
+
+```javascript
+import { SecureChatCompletion } from 'nomyo-js';
+
+const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY });
+
+const response = await client.create({
+  model: 'Qwen/Qwen3-0.6B',
+  messages: [{ role: 'user', content: 'Hello, how are you?' }],
+  temperature: 0.7,
+});
+
+console.log(response.choices[0].message.content);
+client.dispose();
+```
+
+### Chat with System Message
+
+```javascript
+const response = await client.create({
+  model: 'Qwen/Qwen3-0.6B',
+  messages: [
+    { role: 'system', content: 'You are a concise technical assistant.' },
+    { role: 'user',   content: 'What is the capital of France?' },
+  ],
+  temperature: 0.7,
+});
+
+console.log(response.choices[0].message.content);
+```
+
+---
+
+## Security Tiers
+
+```javascript
+// Standard — general use (GPU)
+const r1 = await client.create({
+  model: 'Qwen/Qwen3-0.6B',
+  messages: [{ role: 'user', content: 'General query' }],
+  security_tier: 'standard',
+});
+
+// High — sensitive business data
+const r2 = await client.create({
+  model: 'Qwen/Qwen3-0.6B',
+  messages: [{ role: 'user', content: 'Review this contract clause...' }],
+  security_tier: 'high',
+});
+
+// Maximum — HIPAA PHI / classified data (CPU-only)
+const r3 = await client.create({
+  model: 'Qwen/Qwen3-0.6B',
+  messages: [{ role: 'user', content: 'Patient record summary...' }],
+  security_tier: 'maximum',
+});
+```
+
+---
+
+## Tool / Function Calling
+
+```javascript
+const response = await client.create({
+  model: 'Qwen/Qwen3-0.6B',
+  messages: [{ role: 'user', content: "What's the weather in Paris?" }],
+  tools: [
+    {
+      type: 'function',
+      function: {
+        name: 'get_weather',
+        description: 'Get weather information for a location',
+        parameters: {
+          type: 'object',
+          properties: {
+            location: { type: 'string', description: 'City name' },
+          },
+          required: ['location'],
+        },
+      },
+    },
+  ],
+  tool_choice: 'auto',
+});
+
+const message = response.choices[0].message;
+if (message.tool_calls?.length) {
+  const call = message.tool_calls[0];
+  const args = JSON.parse(call.function.arguments);
+  console.log(`Call ${call.function.name}(location="${args.location}")`);
+  // → Call get_weather(location="Paris")
+}
+```
+
+---
+
+## Error Handling
+
+```javascript
+import {
+  SecureChatCompletion,
+  AuthenticationError,
+  RateLimitError,
+  ForbiddenError,
+  InvalidRequestError,
+  ServerError,
+  ServiceUnavailableError,
+  APIConnectionError,
+  SecurityError,
+} from 'nomyo-js';
+
+const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY });
+
+try {
+  const response = await client.create({
+    model: 'Qwen/Qwen3-0.6B',
+    messages: [{ role: 'user', content: 'Hello' }],
+  });
+  console.log(response.choices[0].message.content);
+
+} catch (err) {
+  if (err instanceof AuthenticationError) {
+    console.error('Check your API key:', err.message);
+  } else if (err instanceof RateLimitError) {
+    console.error('Rate limit hit after all retries:', err.message);
+  } else if (err instanceof ForbiddenError) {
+    console.error('Model not allowed for this security tier:', err.message);
+  } else if (err instanceof InvalidRequestError) {
+    console.error('Bad request:', err.message, err.errorDetails);
+  } else if (err instanceof ServerError || err instanceof ServiceUnavailableError) {
+    console.error('Server error after retries:', err.message);
+  } else if (err instanceof APIConnectionError) {
+    console.error('Network error after retries:', err.message);
+  } else if (err instanceof SecurityError) {
+    console.error('Security/crypto failure:', err.message);
+  } else {
+    throw err;
+  }
+}
+```
+
+---
+
+## Real-World Scenarios
+
+### Chat Application with History
+
+```javascript
+import { SecureChatCompletion } from 'nomyo-js';
+
+class SecureChatApp {
+  constructor(apiKey) {
+    this.client = new SecureChatCompletion({ apiKey });
+    this.history = [];
+  }
+
+  async chat(userMessage) {
+    this.history.push({ role: 'user', content: userMessage });
+
+    const response = await this.client.create({
+      model: 'Qwen/Qwen3-0.6B',
+      messages: this.history,
+      temperature: 0.7,
+    });
+
+    const assistantMessage = response.choices[0].message;
+    this.history.push({ role: assistantMessage.role, content: assistantMessage.content });
+    return assistantMessage.content;
+  }
+
+  dispose() {
+    this.client.dispose();
+  }
+}
+
+const app = new SecureChatApp(process.env.NOMYO_API_KEY);
+
+const r1 = await app.chat("What's your name?");
+console.log('Assistant:', r1);
+
+const r2 = await app.chat('What did I just ask you?');
+console.log('Assistant:', r2);
+
+app.dispose();
+```
+
+### Per-Request Base URL Override
+
+For multi-tenant setups or testing against different router instances from a single client:
+
+```javascript
+const client = new SecureChatCompletion({
+  baseUrl: 'https://primary.nomyo.ai:12435',
+  apiKey: process.env.NOMYO_API_KEY,
+});
+
+// This single request goes to a different router; a temporary client is
+// created, used, and disposed automatically — the main client is unchanged
+const response = await client.create({
+  model: 'Qwen/Qwen3-0.6B',
+  messages: [{ role: 'user', content: 'Hello from secondary router' }],
+  base_url: 'https://secondary.nomyo.ai:12435',
+});
+```
+
+### Environment-Based Configuration
+
+```javascript
+import 'dotenv/config';
+import { SecureChatCompletion } from 'nomyo-js';
+
+const client = new SecureChatCompletion({
+  baseUrl:    process.env.NOMYO_SERVER_URL ?? 'https://api.nomyo.ai',
+  apiKey:     process.env.NOMYO_API_KEY,
+  keyDir:     process.env.NOMYO_KEY_DIR   ?? 'client_keys',
+  maxRetries: Number(process.env.NOMYO_MAX_RETRIES ?? '2'),
+  debug:      process.env.NODE_ENV === 'development',
+});
+```
+
+---
+
+## Batch Processing
+
+### Sequential (Rate-Limit-Safe)
+
+```javascript
+const queries = [
+  'Summarise document A',
+  'Summarise document B',
+  'Summarise document C',
+];
+
+const summaries = [];
+for (const query of queries) {
+  const response = await client.create({
+    model: 'Qwen/Qwen3-0.6B',
+    messages: [{ role: 'user', content: query }],
+  });
+  summaries.push(response.choices[0].message.content);
+  // Optional: add a small delay to stay within rate limits
+  await new Promise(r => setTimeout(r, 600));
+}
+```
+
+### Concurrent (With Throttling)
+
+```javascript
+// Process in batches of 2 (the default rate limit)
+async function batchN(items, batchSize, fn) {
+  const results = [];
+  for (let i = 0; i < items.length; i += batchSize) {
+    const batch = items.slice(i, i + batchSize);
+    const batchResults = await Promise.all(batch.map(fn));
+    results.push(...batchResults);
+    if (i + batchSize < items.length) {
+      await new Promise(r => setTimeout(r, 1100)); // wait >1 s between batches
+    }
+  }
+  return results;
+}
+
+const summaries = await batchN(documents, 2, async (doc) => {
+  const response = await client.create({
+    model: 'Qwen/Qwen3-0.6B',
+    messages: [{ role: 'user', content: `Summarise: ${doc}` }],
+  });
+  return response.choices[0].message.content;
+});
+```
+
+---
+
+## Thinking Models
+
+```javascript
+const response = await client.create({
+  model: 'LiquidAI/LFM2.5-1.2B-Thinking',
+  messages: [{ role: 'user', content: 'Is 9.9 larger than 9.11?' }],
+});
+
+const { content, reasoning_content } = response.choices[0].message;
+console.log('Reasoning:', reasoning_content);   // internal chain-of-thought
+console.log('Answer:',    content);             // final answer to the user
+```
+
+---
+
+## Browser Usage
+
+```html
+<!DOCTYPE html>
+<html>
+<head>
+  <title>NOMYO Secure Chat</title>
+</head>
+<body>
+  <textarea id="input" placeholder="Ask something..."></textarea>
+  <button id="send">Send</button>
+  <div id="output"></div>
+
+  <script type="module">
+    import { SecureChatCompletion } from 'https://unpkg.com/nomyo-js/dist/browser/index.js';
+
+    // In production, proxy through your backend instead of exposing the API key
+    const client = new SecureChatCompletion({
+      baseUrl: 'https://api.nomyo.ai',
+      apiKey:  'your-api-key',        // see note above
+    });
+
+    document.getElementById('send').addEventListener('click', async () => {
+      const text = document.getElementById('input').value.trim();
+      if (!text) return;
+
+      document.getElementById('output').textContent = 'Thinking...';
+      try {
+        const response = await client.create({
+          model: 'Qwen/Qwen3-0.6B',
+          messages: [{ role: 'user', content: text }],
+        });
+        document.getElementById('output').textContent =
+          response.choices[0].message.content;
+      } catch (err) {
+        document.getElementById('output').textContent = `Error: ${err.message}`;
+      }
+    });
+  </script>
+</body>
+</html>
+```
+
+---
+
+## Advanced Key Management
+
+### Custom Key Directory
+
+```javascript
+const client = new SecureChatCompletion({
+  apiKey:  process.env.NOMYO_API_KEY,
+  keyDir:  '/var/lib/myapp/nomyo-keys',   // outside project directory
+  keyRotationDir:      '/var/lib/myapp/nomyo-keys',
+  keyRotationPassword: process.env.NOMYO_KEY_PASSWORD,
+});
+```
+
+### Generating Keys Manually
+
+```javascript
+import { SecureCompletionClient } from 'nomyo-js';
+
+const client = new SecureCompletionClient({
+  routerUrl: 'https://api.nomyo.ai',
+});
+
+// Generate a new 4096-bit key pair and save it with password protection
+await client.generateKeys({
+  saveToFile: true,
+  keyDir:     'client_keys',
+  password:   process.env.NOMYO_KEY_PASSWORD,
+});
+
+console.log('Keys generated and saved to client_keys/');
+client.dispose();
+```
+
+### Loading Keys Explicitly
+
+```javascript
+import { SecureCompletionClient } from 'nomyo-js';
+
+const client = new SecureCompletionClient({ routerUrl: 'https://api.nomyo.ai' });
+
+await client.loadKeys(
+  'client_keys/private_key.pem',
+  'client_keys/public_key.pem',
+  process.env.NOMYO_KEY_PASSWORD
+);
+
+// Now send requests using the loaded keys
+const result = await client.sendSecureRequest(
+  { model: 'Qwen/Qwen3-0.6B', messages: [{ role: 'user', content: 'Hello' }] },
+  crypto.randomUUID()
+);
+client.dispose();
+```
+
+---
+
+## Inspecting Memory Protection
+
+```javascript
+import { getMemoryProtectionInfo } from 'nomyo-js';
+
+const info = getMemoryProtectionInfo();
+
+console.log(`Memory method: ${info.method}`);        // 'zero-only' or 'mlock'
+console.log(`Can lock:      ${info.canLock}`);       // true if native addon present
+console.log(`Details:       ${info.details}`);
+```
+
+---
+
+## TypeScript
+
+Full type safety out of the box:
+
+```typescript
+import {
+  SecureChatCompletion,
+  ChatCompletionRequest,
+  ChatCompletionResponse,
+  Message,
+} from 'nomyo-js';
+
+const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY! });
+
+const messages: Message[] = [
+  { role: 'user', content: 'Hello!' },
+];
+
+const request: ChatCompletionRequest = {
+  model: 'Qwen/Qwen3-0.6B',
+  messages,
+  temperature: 0.7,
+};
+
+const response: ChatCompletionResponse = await client.create(request);
+const content = response.choices[0].message.content;
+
+client.dispose();
+```
--- a/doc/getting-started.md
+++ b/doc/getting-started.md
@ -0,0 +1,279 @@
+# Getting Started
+
+## Overview
+
+NOMYO.js provides end-to-end encryption for all communication between your application and NOMYO inference endpoints. Your prompts and responses are encrypted before leaving your process and decrypted only after they arrive back — the server never sees plaintext.
+
+The API mirrors OpenAI's `ChatCompletion`, making it easy to integrate into existing code.
+
+> **Note on streaming:** The API is non-streaming. Setting `stream: true` in a request is ignored server-side to maintain full response encryption.
+
+---
+
+## Simple Chat Completion
+
+```javascript
+import { SecureChatCompletion } from 'nomyo-js';
+
+const client = new SecureChatCompletion({
+  apiKey: process.env.NOMYO_API_KEY,
+});
+
+const response = await client.create({
+  model: 'Qwen/Qwen3-0.6B',
+  messages: [{ role: 'user', content: 'Hello! How are you today?' }],
+  temperature: 0.7,
+});
+
+// Extract what you need, then let the response go out of scope promptly.
+// This minimises the time decrypted data lives in process memory
+// (reduces exposure from swap files, core dumps, or memory inspection).
+const reply = response.choices[0].message.content;
+console.log(reply);
+```
+
+### With a System Message
+
+```javascript
+const response = await client.create({
+  model: 'Qwen/Qwen3-0.6B',
+  messages: [
+    { role: 'system', content: 'You are a helpful assistant.' },
+    { role: 'user',   content: 'What is the capital of France?' },
+  ],
+  temperature: 0.7,
+});
+
+console.log(response.choices[0].message.content);
+```
+
+---
+
+## API Key Authentication
+
+```javascript
+// Constructor-level key (used for all requests from this instance)
+const client = new SecureChatCompletion({
+  apiKey: process.env.NOMYO_API_KEY,
+});
+
+// Per-request key override (takes precedence over constructor key)
+const response = await client.create({
+  model: 'Qwen/Qwen3-0.6B',
+  messages: [{ role: 'user', content: 'Hello!' }],
+  api_key: 'override-key-for-this-request',
+});
+```
+
+---
+
+## Security Tiers
+
+Pass `security_tier` in the request to control hardware routing and isolation level:
+
+
+| Tier         | Use case                                              |
+| -------------- | ------------------------------------------------------- |
+| `"standard"` | General secure inference (GPU)                        |
+| `"high"`     | Sensitive business data — enforces secure tokenizer  |
+| `"maximum"`  | HIPAA PHI, classified data — E2EE, maximum isolation |
+
+```javascript
+// Standard — general use
+const response = await client.create({
+  model: 'Qwen/Qwen3-0.6B',
+  messages: [{ role: 'user', content: 'General query' }],
+  security_tier: 'standard',
+});
+
+// High — sensitive business data
+const response2 = await client.create({
+  model: 'Qwen/Qwen3-0.6B',
+  messages: [{ role: 'user', content: 'Summarise this contract clause...' }],
+  security_tier: 'high',
+});
+
+// Maximum — PHI / classified data
+const response3 = await client.create({
+  model: 'Qwen/Qwen3-0.6B',
+  messages: [{ role: 'user', content: 'Patient record summary...' }],
+  security_tier: 'maximum',
+});
+```
+
+> Using `"high"` or `"maximum"` adds latency vs `"standard"` due to additional isolation measures.
+
+---
+
+## Using Tools (Function Calling)
+
+```javascript
+const response = await client.create({
+  model: 'Qwen/Qwen3-0.6B',
+  messages: [{ role: 'user', content: "What's the weather in Paris?" }],
+  tools: [
+    {
+      type: 'function',
+      function: {
+        name: 'get_weather',
+        description: 'Get weather information for a location',
+        parameters: {
+          type: 'object',
+          properties: {
+            location: { type: 'string', description: 'City name' },
+          },
+          required: ['location'],
+        },
+      },
+    },
+  ],
+  tool_choice: 'auto',
+  temperature: 0.7,
+});
+
+const message = response.choices[0].message;
+if (message.tool_calls) {
+  const call = message.tool_calls[0];
+  console.log('Tool called:', call.function.name);
+  console.log('Arguments:', call.function.arguments);
+}
+```
+
+---
+
+## Error Handling
+
+Import typed error classes to distinguish failure modes:
+
+```javascript
+import {
+  SecureChatCompletion,
+  AuthenticationError,
+  RateLimitError,
+  InvalidRequestError,
+  ForbiddenError,
+  ServerError,
+  ServiceUnavailableError,
+  APIConnectionError,
+  SecurityError,
+} from 'nomyo-js';
+
+const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY });
+
+try {
+  const response = await client.create({
+    model: 'Qwen/Qwen3-0.6B',
+    messages: [{ role: 'user', content: 'Hello!' }],
+  });
+  console.log(response.choices[0].message.content);
+
+} catch (err) {
+  if (err instanceof AuthenticationError) {
+    console.error('Bad API key:', err.message);
+
+  } else if (err instanceof RateLimitError) {
+    // The client already retried automatically (default: 2 retries).
+    // If you reach here, all retries were exhausted.
+    console.error('Rate limit exceeded after retries:', err.message);
+
+  } else if (err instanceof ForbiddenError) {
+    // Model not allowed for the requested security_tier
+    console.error('Forbidden:', err.message);
+
+  } else if (err instanceof InvalidRequestError) {
+    console.error('Bad request:', err.message);
+
+  } else if (err instanceof ServerError || err instanceof ServiceUnavailableError) {
+    console.error('Server error (retries exhausted):', err.message);
+
+  } else if (err instanceof APIConnectionError) {
+    console.error('Network error (retries exhausted):', err.message);
+
+  } else if (err instanceof SecurityError) {
+    console.error('Encryption/decryption failure:', err.message);
+
+  } else {
+    throw err;   // re-throw unexpected errors
+  }
+}
+```
+
+All typed errors expose:
+
+- `message: string` — human-readable description
+- `statusCode?: number` — HTTP status (where applicable)
+- `errorDetails?: object` — raw response body (where applicable)
+
+---
+
+## Resource Management
+
+Always call `dispose()` when you're done with a client to stop the background key-rotation timer and zero in-memory key material:
+
+```javascript
+const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY });
+
+try {
+  const response = await client.create({ ... });
+  console.log(response.choices[0].message.content);
+} finally {
+  client.dispose();
+}
+```
+
+For long-running servers (HTTP handlers, daemons), create one client instance and reuse it — don't create a new one per request.
+
+---
+
+## `acreate()` Alias
+
+`acreate()` is a direct alias for `create()` provided for code that follows the OpenAI naming convention:
+
+```javascript
+const response = await client.acreate({
+  model: 'Qwen/Qwen3-0.6B',
+  messages: [{ role: 'user', content: 'Hello!' }],
+});
+```
+
+---
+
+## Browser Usage
+
+In browsers, keys are kept in memory only (no file system). Everything else is identical to Node.js.
+
+```html
+<script type="module">
+  import { SecureChatCompletion } from 'https://unpkg.com/nomyo-js/dist/browser/index.js';
+
+  const client = new SecureChatCompletion({
+    baseUrl: 'https://api.nomyo.ai',
+    apiKey: 'your-api-key',
+  });
+
+  const response = await client.create({
+    model: 'Qwen/Qwen3-0.6B',
+    messages: [{ role: 'user', content: 'Hello from the browser!' }],
+  });
+
+  console.log(response.choices[0].message.content);
+  client.dispose();
+</script>
+```
+
+> **Security note:** Embedding API keys in browser-side code exposes them to end users. In a real application, proxy requests through your backend or use short-lived tokens.
+
+---
+
+## Local Development (HTTP)
+
+For a local NOMYO router running over plain HTTP:
+
+```javascript
+const client = new SecureChatCompletion({
+  baseUrl: 'http://localhost:12435',
+  allowHttp: true,   // required; prints a security warning
+});
+```
+
+Never use `allowHttp: true` in production.
--- a/doc/installation.md
+++ b/doc/installation.md
@ -0,0 +1,107 @@
+# Installation
+
+## Prerequisites
+
+- **Node.js**: 14.17 or higher (18 LTS recommended)
+- **npm** / **yarn** / **pnpm**
+- For TypeScript projects: TypeScript 4.7+
+
+## Install from npm
+
+```bash
+npm install nomyo-js
+```
+
+```bash
+yarn add nomyo-js
+```
+
+```bash
+pnpm add nomyo-js
+```
+
+## Browser (CDN)
+
+```html
+<script type="module">
+  import { SecureChatCompletion } from 'https://unpkg.com/nomyo-js/dist/browser/index.js';
+  // ...
+</script>
+```
+
+---
+
+## Verify Installation
+
+### Node.js
+
+```javascript
+import { SecureChatCompletion, getMemoryProtectionInfo } from 'nomyo-js';
+
+const info = getMemoryProtectionInfo();
+console.log('Memory protection:', info.method);   // e.g. "zero-only"
+console.log('Can lock:', info.canLock);           // true if native addon present
+
+const client = new SecureChatCompletion({ apiKey: 'test' });
+console.log('nomyo-js installed successfully');
+client.dispose();
+```
+
+### CommonJS
+
+```javascript
+const { SecureChatCompletion } = require('nomyo-js');
+```
+
+## Optional: Native Memory Addon
+
+The pure-JS implementation zeroes buffers immediately after use but cannot prevent the OS from paging sensitive data to swap.
+The optional native addon adds `mlock`/`VirtualLock` support for true OS-level memory locking.
+
+```bash
+cd node_modules/nomyo-js/native
+npm install
+npm run build
+```
+
+Or if you installed `nomyo-native` separately:
+
+```bash
+npm install nomyo-native
+```
+
+When the addon is present `getMemoryProtectionInfo()` will report `method: 'mlock'` and `canLock: true`.
+
+## TypeScript
+
+All public APIs ship with bundled type declarations — no `@types/` package required.
+
+```typescript
+import {
+  SecureChatCompletion,
+  ChatCompletionRequest,
+  ChatCompletionResponse,
+  getMemoryProtectionInfo,
+} from 'nomyo-js';
+```
+
+## Environment Variables
+
+Store secrets outside source code:
+
+```bash
+# .env (never commit this file)
+NOMYO_API_KEY=your-api-key
+NOMYO_SERVER_URL=https://api.nomyo.ai
+NOMYO_KEY_PASSWORD=your-key-password
+```
+
+```javascript
+import 'dotenv/config';   // or use process.env directly
+import { SecureChatCompletion } from 'nomyo-js';
+
+const client = new SecureChatCompletion({
+  baseUrl: process.env.NOMYO_SERVER_URL,
+  apiKey:  process.env.NOMYO_API_KEY,
+});
+```
--- a/doc/models.md
+++ b/doc/models.md
@ -0,0 +1,85 @@
+# Available Models
+
+All models are available via `api.nomyo.ai`. Pass the model ID string directly to the `model` field of `create()`.
+
+## Model List
+
+| Model ID | Parameters | Type | Notes |
+|---|---|---|---|
+| `Qwen/Qwen3-0.6B` | 0.6B | General | Lightweight, fast inference |
+| `Qwen/Qwen3.5-0.8B` | 0.8B | General | Lightweight, fast inference |
+| `LiquidAI/LFM2.5-1.2B-Thinking` | 1.2B | Thinking | Reasoning model |
+| `ibm-granite/granite-4.0-h-small` | Small | General | IBM Granite 4.0, enterprise-focused |
+| `Qwen/Qwen3.5-9B` | 9B | General | Balanced quality and speed |
+| `utter-project/EuroLLM-9B-Instruct-2512` | 9B | General | Multilingual, strong European language support |
+| `zai-org/GLM-4.7-Flash` | — | General | Fast GLM variant |
+| `mistralai/Ministral-3-14B-Instruct-2512-GGUF` | 14B | General | Mistral instruction-tuned |
+| `ServiceNow-AI/Apriel-1.6-15b-Thinker` | 15B | Thinking | Reasoning model |
+| `openai/gpt-oss-20b` | 20B | General | OpenAI open-weight release |
+| `LiquidAI/LFM2-24B-A2B` | 24B (2B active) | General | MoE — efficient inference |
+| `Qwen/Qwen3.5-27B` | 27B | General | High quality, large context |
+| `google/medgemma-27b-it` | 27B | Specialized | Medical domain, instruction-tuned |
+| `nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4` | 30B (3B active) | General | MoE — efficient inference |
+| `Qwen/Qwen3.5-35B-A3B` | 35B (3B active) | General | MoE — efficient inference |
+| `moonshotai/Kimi-Linear-48B-A3B-Instruct` | 48B (3B active) | General | MoE — large capacity, efficient inference |
+
+> **MoE** (Mixture of Experts) models show total/active parameter counts. Only active parameters are used per token, keeping inference cost low relative to total model size.
+
+## Usage
+
+```javascript
+import { SecureChatCompletion } from 'nomyo-js';
+
+const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY });
+
+const response = await client.create({
+  model: 'Qwen/Qwen3.5-9B',
+  messages: [{ role: 'user', content: 'Hello!' }],
+});
+```
+
+## Choosing a Model
+
+| Goal | Recommended models |
+|------|--------------------|
+| **Low latency / edge** | `Qwen/Qwen3-0.6B`, `Qwen/Qwen3.5-0.8B`, `LiquidAI/LFM2.5-1.2B-Thinking` |
+| **Balanced quality + speed** | `Qwen/Qwen3.5-9B`, `mistralai/Ministral-3-14B-Instruct-2512-GGUF` |
+| **Reasoning / chain-of-thought** | `LiquidAI/LFM2.5-1.2B-Thinking`, `ServiceNow-AI/Apriel-1.6-15b-Thinker` |
+| **Multilingual** | `utter-project/EuroLLM-9B-Instruct-2512` |
+| **Medical** | `google/medgemma-27b-it` |
+| **Highest quality** | `moonshotai/Kimi-Linear-48B-A3B-Instruct`, `Qwen/Qwen3.5-35B-A3B` |
+
+## Thinking Models
+
+Models marked **Thinking** return an additional `reasoning_content` field in the response message alongside the normal `content`. This contains the model's internal chain-of-thought:
+
+```javascript
+const response = await client.create({
+  model: 'LiquidAI/LFM2.5-1.2B-Thinking',
+  messages: [{ role: 'user', content: 'Is 9.9 or 9.11 larger?' }],
+});
+
+const { content, reasoning_content } = response.choices[0].message;
+console.log('Reasoning:', reasoning_content); // internal chain-of-thought
+console.log('Answer:', content);              // final answer
+```
+
+## Security Tier Compatibility
+
+Not all models are available on all security tiers. If a model is not permitted for the requested tier, the server returns HTTP 403 and the client throws `ForbiddenError`.
+
+```javascript
+import { ForbiddenError } from 'nomyo-js';
+
+try {
+  const response = await client.create({
+    model: 'Qwen/Qwen3.5-27B',
+    messages: [{ role: 'user', content: '...' }],
+    security_tier: 'maximum',
+  });
+} catch (err) {
+  if (err instanceof ForbiddenError) {
+    // Model not available at this security tier — retry with a different tier or model
+  }
+}
+```
--- a/doc/rate-limits.md
+++ b/doc/rate-limits.md
@ -0,0 +1,115 @@
+# Rate Limits
+
+The NOMYO API (`api.nomyo.ai`) enforces rate limits to ensure fair usage and service stability for all users.
+
+## Default Rate Limit
+
+By default, each API key is limited to **2 requests per second**.
+
+## Burst Allowance
+
+Short bursts above the default limit are permitted. You may send up to **4 requests per second** in burst mode, provided you have not exceeded burst usage within the current **10-second window**.
+
+Burst capacity is granted once per 10-second window. If you consume the burst allowance, you must wait for the window to reset before burst is available again.
+
+## Rate Limit Summary
+
+| Mode | Limit | Condition |
+|------|-------|-----------|
+| Default | 2 requests/second | Always active |
+| Burst | 4 requests/second | Once per 10-second window |
+
+## Error Responses
+
+### 429 Too Many Requests
+
+Returned when your request rate exceeds the allowed limit.
+
+The client retries automatically (see below). If all retries are exhausted, `RateLimitError` is thrown:
+
+```javascript
+import { SecureChatCompletion, RateLimitError } from 'nomyo-js';
+
+try {
+  const response = await client.create({ ... });
+} catch (err) {
+  if (err instanceof RateLimitError) {
+    // All retries exhausted — back off manually before trying again
+    console.error('Rate limit exceeded:', err.message);
+  }
+}
+```
+
+### 503 Service Unavailable (Cool-down)
+
+Returned when burst limits are abused repeatedly. A **30-minute cool-down** is applied to the offending API key.
+
+**What to do:** Wait 30 minutes before retrying. Review your request patterns to ensure you stay within the permitted limits.
+
+## Automatic Retry Behaviour
+
+The client retries automatically on `429`, `500`, `502`, `503`, `504`, and network errors using exponential backoff:
+
+| Attempt | Delay before attempt |
+|---------|----------------------|
+| 1st (initial) | — |
+| 2nd | 1 second |
+| 3rd | 2 seconds |
+
+The default is **2 retries** (3 total attempts). Adjust per client:
+
+```javascript
+// More retries for high-throughput workloads
+const client = new SecureChatCompletion({
+  apiKey: process.env.NOMYO_API_KEY,
+  maxRetries: 5,
+});
+
+// Disable retries entirely (fail fast)
+const client2 = new SecureChatCompletion({
+  apiKey: process.env.NOMYO_API_KEY,
+  maxRetries: 0,
+});
+```
+
+## Best Practices
+
+- **Throttle requests client-side** to stay at or below 2 requests/second under normal load.
+- **Use burst sparingly** — it is intended for occasional spikes, not sustained high-throughput usage.
+- **Increase `maxRetries`** for background jobs that can tolerate extra latency.
+- **Monitor for `503` responses** — repeated occurrences indicate your usage pattern is triggering the abuse threshold.
+- **Parallel requests** (e.g. `Promise.all`) count against the same rate limit — be careful with large batches.
+
+## Batch Processing Example
+
+Throttle parallel requests to stay within the rate limit:
+
+```javascript
+import { SecureChatCompletion } from 'nomyo-js';
+
+const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY });
+
+async function throttledBatch(queries, requestsPerSecond = 2) {
+  const results = [];
+  const delayMs = 1000 / requestsPerSecond;
+
+  for (const query of queries) {
+    const start = Date.now();
+
+    const response = await client.create({
+      model: 'Qwen/Qwen3-0.6B',
+      messages: [{ role: 'user', content: query }],
+    });
+    results.push(response.choices[0].message.content);
+
+    // Throttle: wait for the remainder of the time slot
+    const elapsed = Date.now() - start;
+    if (elapsed < delayMs) {
+      await new Promise(resolve => setTimeout(resolve, delayMs - elapsed));
+    }
+  }
+
+  client.dispose();
+  return results;
+}
+```
--- a/doc/security-guide.md
+++ b/doc/security-guide.md
@ -0,0 +1,237 @@
+# Security Guide
+
+## Overview
+
+NOMYO.js provides end-to-end encryption for all communication between your application and NOMYO inference endpoints. Your prompts and responses are encrypted before leaving your process — the inference server never processes plaintext.
+
+For the full cryptographic architecture and threat model see [SECURITY.md](../docs/SECURITY.md).
+
+---
+
+## Encryption Mechanism
+
+### Hybrid Encryption
+
+Each request uses a two-layer scheme:
+
+1. **AES-256-GCM** encrypts the payload (authenticated encryption — prevents tampering).
+2. **RSA-OAEP-SHA256** wraps the AES key for secure key exchange.
+
+The server holds the RSA private key; your client generates the AES key fresh for every request.
+
+### Per-Request Ephemeral AES Keys
+
+- A new 256-bit AES key is generated for every `create()` call using the Web Crypto API.
+- The key is never reused — forward secrecy is ensured per request.
+- The key is zeroed from memory immediately after encryption.
+
+### Key Exchange
+
+Your client's RSA public key is sent in the `X-Public-Key` request header. The server encrypts the response with it so only your client can decrypt the reply.
+
+---
+
+## Memory Protection
+
+### What the Library Does
+
+All intermediate sensitive buffers (AES key, plaintext payload, decrypted response bytes) are wrapped in `SecureByteContext`. This guarantees they are zeroed in a `finally` block immediately after use, even if an exception occurs.
+
+The encrypted request body (`ArrayBuffer`) is also zeroed by the Node.js HTTP client after the data is handed to the socket.
+
+### Limitations (Pure JavaScript)
+
+JavaScript has no direct access to OS memory management. The library cannot:
+
+- Lock pages to prevent swapping (`mlock` / `VirtualLock`)
+- Prevent the garbage collector from copying data internally
+- Guarantee memory won't appear in core dumps
+
+**Impact:** On a system under memory pressure, sensitive data could briefly be written to swap. For environments where this is unacceptable (PHI, classified), install the optional native addon or run on a system with swap disabled.
+
+### Native Addon (Optional)
+
+The `nomyo-native` addon adds true `mlock` support. When installed, `getMemoryProtectionInfo()` reports `method: 'mlock'` and `canLock: true`:
+
+```javascript
+import { getMemoryProtectionInfo } from 'nomyo-js';
+
+const info = getMemoryProtectionInfo();
+// Without addon: { method: 'zero-only', canLock: false }
+// With addon:    { method: 'mlock',     canLock: true  }
+```
+
+---
+
+## Minimise Response Lifetime
+
+The library protects all intermediate crypto material in secure memory. However, the **final parsed response object** is returned to your code, and you are responsible for how long it lives.
+
+```javascript
+// GOOD — extract what you need, then drop the response immediately
+const response = await client.create({
+  model: 'Qwen/Qwen3.5-9B',
+  messages: [{ role: 'user', content: 'Summarise patient record #1234' }],
+  security_tier: 'maximum',
+});
+const reply = response.choices[0].message.content;
+// Let response go out of scope here — don't hold it in a variable
+// longer than necessary
+
+// BAD — holding the full response object in a long-lived scope
+this.lastResponse = response;  // stored for minutes / hours
+```
+
+JavaScript's `delete` and variable reassignment do not zero the underlying memory. For sensitive data (PHI, classified), process and discard as quickly as possible — do not store in class attributes, global caches, or log files.
+
+---
+
+## Key Management
+
+### Default Behaviour
+
+Keys are automatically generated on first use and saved to `client_keys/` (Node.js). On subsequent runs the saved keys are reloaded automatically.
+
+```
+client_keys/
+  private_key.pem    # permissions 0600 (owner-only)
+  public_key.pem     # permissions 0644
+```
+
+### Configure the Key Directory
+
+```javascript
+const client = new SecureChatCompletion({
+  apiKey: process.env.NOMYO_API_KEY,
+  keyDir: '/etc/myapp/nomyo-keys',  // custom path, outside project directory
+});
+```
+
+### Password-Protected Keys (Recommended for Production)
+
+Protect key files with a password so they cannot be used even if the file is leaked:
+
+```javascript
+import { SecureCompletionClient } from 'nomyo-js';
+
+const client = new SecureCompletionClient({ routerUrl: 'https://api.nomyo.ai' });
+
+await client.generateKeys({
+  saveToFile: true,
+  keyDir: 'client_keys',
+  password: process.env.NOMYO_KEY_PASSWORD,  // minimum 8 characters
+});
+```
+
+To load password-protected keys manually:
+
+```javascript
+await client.loadKeys(
+  'client_keys/private_key.pem',
+  'client_keys/public_key.pem',
+  process.env.NOMYO_KEY_PASSWORD
+);
+```
+
+### Key Rotation
+
+Keys rotate automatically every 24 hours by default. Configure or disable:
+
+```javascript
+const client = new SecureChatCompletion({
+  apiKey: process.env.NOMYO_API_KEY,
+  keyRotationInterval: 3600000,              // rotate every hour
+  keyRotationDir: '/var/lib/myapp/keys',
+  keyRotationPassword: process.env.KEY_PWD,
+});
+
+// Or disable entirely for short-lived processes
+const client2 = new SecureChatCompletion({
+  apiKey: process.env.NOMYO_API_KEY,
+  keyRotationInterval: 0,
+});
+```
+
+### File Permissions
+
+Private key files are saved with `0600` permissions (owner read/write only) on Unix-like systems. Add `client_keys/` and `*.pem` to your `.gitignore` — both are already included if you use this package's default `.gitignore`.
+
+---
+
+## Security Tiers
+
+| Tier | Hardware | Use case |
+|------|----------|----------|
+| `"standard"` | GPU | General secure inference |
+| `"high"` | CPU/GPU balanced | Sensitive business data, enforces secure tokenizer |
+| `"maximum"` | CPU only | HIPAA PHI, classified data — maximum isolation |
+
+Higher tiers add round-trip latency but increase hardware-level isolation.
+
+---
+
+## HTTPS Enforcement
+
+The client enforces HTTPS by default. HTTP connections require explicit opt-in and print a visible warning:
+
+```javascript
+// Production — HTTPS only (default)
+const client = new SecureChatCompletion({ baseUrl: 'https://api.nomyo.ai' });
+
+// Local development — HTTP allowed with explicit flag
+const devClient = new SecureChatCompletion({
+  baseUrl: 'http://localhost:12435',
+  allowHttp: true,   // prints: "WARNING: Using HTTP instead of HTTPS..."
+});
+```
+
+Without `allowHttp: true`, connecting over HTTP throws `SecurityError`.
+
+The server's public key is fetched over HTTPS with TLS certificate verification to prevent man-in-the-middle attacks.
+
+---
+
+## API Key Security
+
+API keys are sent as `Bearer` tokens in the `Authorization` header. The client validates that the key does not contain CR or LF characters to prevent HTTP header injection.
+
+Never hardcode API keys in source code — use environment variables:
+
+```javascript
+const client = new SecureChatCompletion({
+  apiKey: process.env.NOMYO_API_KEY,
+});
+```
+
+---
+
+## Production Checklist
+
+- [ ] Always use HTTPS (`allowHttp` is `false` by default)
+- [ ] Load API key from environment variable, not hardcoded
+- [ ] Enable `secureMemory: true` (default)
+- [ ] Use password-protected key files (`keyRotationPassword`)
+- [ ] Store keys outside the project directory and outside version control
+- [ ] Add `client_keys/` and `*.pem` to `.gitignore`
+- [ ] Call `client.dispose()` when the client is no longer needed
+- [ ] Consider the native addon if swap-file exposure is unacceptable
+
+---
+
+## Compliance Considerations
+
+### HIPAA
+
+For Protected Health Information (PHI):
+- Use `security_tier: 'maximum'` on requests containing PHI
+- Enable password-protected key files
+- Ensure HTTPS is enforced (the default)
+- Minimise response lifetime in memory (extract, use, discard)
+
+### Data Classification
+
+| Classification | Recommended tier |
+|---------------|-----------------|
+| Public / internal | `"standard"` |
+| Confidential business data | `"high"` |
+| PHI, PII, classified | `"maximum"` |
--- a/doc/troubleshooting.md
+++ b/doc/troubleshooting.md
@ -0,0 +1,314 @@
+# Troubleshooting
+
+## Authentication Errors
+
+### `AuthenticationError: Invalid or missing API key`
+
+The server rejected your API key.
+
+**Causes and fixes:**
+
+- Key not set — pass `apiKey` to the constructor or use `process.env.NOMYO_API_KEY`.
+- Key has leading/trailing whitespace — check the value with `console.log(JSON.stringify(process.env.NOMYO_API_KEY))`.
+- Key contains CR or LF characters — the client rejects keys with `\r` or `\n` and throws `SecurityError` before the request is sent. Regenerate the key.
+
+```javascript
+const client = new SecureChatCompletion({
+  apiKey: process.env.NOMYO_API_KEY,  // never hardcode
+});
+```
+
+---
+
+## Connection Errors
+
+### `APIConnectionError: Network error` / `connect ECONNREFUSED`
+
+The client could not reach the router.
+
+**Check:**
+
+1. `baseUrl` is correct — the default is `https://api.nomyo.ai` (port **12435**).
+2. You have network access to the host.
+3. TLS is not being blocked by a proxy or firewall.
+
+### `SecurityError: HTTPS is required`
+
+You passed an `http://` URL without setting `allowHttp: true`.
+
+```javascript
+// Local dev only
+const client = new SecureChatCompletion({
+  baseUrl: 'http://localhost:12435',
+  allowHttp: true,
+});
+```
+
+Never set `allowHttp: true` in production — the server public key fetch and all request data would travel unencrypted.
+
+### `APIConnectionError: Request timed out`
+
+The default timeout is 60 seconds. Larger models or busy endpoints may need more:
+
+```javascript
+const client = new SecureChatCompletion({
+  apiKey: process.env.NOMYO_API_KEY,
+  timeout: 120000,  // 2 minutes
+});
+```
+
+---
+
+## Key Loading Failures
+
+### `Error: Failed to load keys: no such file or directory`
+
+The `keyDir` directory or the PEM files inside it don't exist. On first run the library generates and saves a new key pair automatically. If you specified a custom `keyDir`, make sure the directory is writable:
+
+```javascript
+const client = new SecureChatCompletion({
+  apiKey: process.env.NOMYO_API_KEY,
+  keyDir: '/var/lib/myapp/nomyo-keys',  // directory must exist and be writable
+});
+```
+
+### `Error: Invalid passphrase` / `Error: Failed to decrypt private key`
+
+The password you passed to `loadKeys()` or `keyRotationPassword` doesn't match what was used to encrypt the file.
+
+```javascript
+await client.loadKeys(
+  'client_keys/private_key.pem',
+  'client_keys/public_key.pem',
+  process.env.NOMYO_KEY_PASSWORD,  // must match the password used on generateKeys()
+);
+```
+
+### `Error: RSA key too small`
+
+The library enforces a minimum key size of 2048 bits. If you have old 1024-bit keys, regenerate them:
+
+```javascript
+await client.generateKeys({
+  saveToFile: true,
+  keyDir: 'client_keys',
+  keySize: 4096,  // recommended
+});
+```
+
+### `Error: Failed to load keys` (browser)
+
+Key loading from files is a Node.js-only feature. In browsers, keys are generated in memory on first use. Do not call `loadKeys()` in a browser context.
+
+---
+
+## Rate Limit Errors
+
+### `RateLimitError: Rate limit exceeded`
+
+All automatic retries were exhausted. The default limit is 2 requests/second; burst allows 4 requests/second once per 10-second window.
+
+**Fixes:**
+
+- Reduce concurrency — avoid large `Promise.all` batches.
+- Add client-side throttling (see [Rate Limits](rate-limits.md)).
+- Increase `maxRetries` so the client backs off longer before giving up:
+
+```javascript
+const client = new SecureChatCompletion({
+  apiKey: process.env.NOMYO_API_KEY,
+  maxRetries: 5,
+});
+```
+
+### `ServiceUnavailableError` with 30-minute cool-down
+
+Burst limits were hit repeatedly and a cool-down was applied to your key. Wait 30 minutes, then review your request patterns.
+
+---
+
+## Model / Tier Errors
+
+### `ForbiddenError: Model not allowed for this security tier`
+
+The model you requested is not available at the security tier you specified. Try a lower tier or a different model:
+
+```javascript
+// If 'maximum' tier rejects the model, try 'high' or 'standard'
+const response = await client.create({
+  model: 'Qwen/Qwen3.5-27B',
+  messages: [...],
+  security_tier: 'high',  // try 'standard' if still rejected
+});
+```
+
+See [Models — Security Tier Compatibility](models.md#security-tier-compatibility) for details.
+
+---
+
+## Crypto / Security Errors
+
+### `SecurityError: Decryption failed`
+
+The response could not be decrypted. This is intentionally vague to avoid leaking crypto details.
+
+**Possible causes:**
+
+- The server returned a malformed response (check `debug: true` output).
+- A network proxy modified the response body.
+- The server's public key changed mid-session — the next request will re-fetch it automatically.
+
+Enable debug mode to log the raw response and narrow the cause:
+
+```javascript
+const client = new SecureChatCompletion({
+  apiKey: process.env.NOMYO_API_KEY,
+  debug: true,
+});
+```
+
+### `Error: Unsupported protocol version` / `Error: Unsupported encryption algorithm`
+
+The server sent a response in a protocol version or with an encryption algorithm not supported by this client version. Update the package:
+
+```bash
+npm update nomyo-js
+```
+
+---
+
+## `DisposedError`: Method called after `dispose()`
+
+You called a method on a client that has already been disposed.
+
+```javascript
+client.dispose();
+await client.create(...);  // throws DisposedError
+```
+
+Create a new client instance if you need to make more requests after disposal.
+
+---
+
+## Memory Protection Warnings
+
+### `getMemoryProtectionInfo()` returns `method: 'zero-only'`
+
+This is normal for a pure JavaScript installation. The library zeroes sensitive buffers immediately after use but cannot lock pages to prevent swapping (OS `mlock` requires a native addon).
+
+```javascript
+import { getMemoryProtectionInfo } from 'nomyo-js';
+
+const info = getMemoryProtectionInfo();
+// { method: 'zero-only', canLock: false, isPlatformSecure: false }
+```
+
+For environments where swap-file exposure is unacceptable (HIPAA PHI, classified data), install the optional `nomyo-native` addon or run on a system with swap disabled.
+
+---
+
+## Node.js-Specific Issues
+
+### `ReferenceError: crypto is not defined`
+
+In CommonJS modules on Node.js before v19, `crypto` is not a global. Import it explicitly:
+
+```javascript
+// CommonJS
+const { webcrypto } = require('crypto');
+global.crypto = webcrypto;
+
+// Or switch to ES modules (recommended)
+// package.json: "type": "module"
+```
+
+The library itself imports `crypto` correctly — this error only appears if your own application code tries to use `crypto` directly.
+
+### `SyntaxError: Cannot use import statement in a module` / CommonJS vs ESM
+
+The package ships both CommonJS (`dist/node/`) and ESM (`dist/esm/`) builds. Node.js selects the correct one automatically via `package.json` `exports`. If you see import errors, check that your `package.json` or bundler is not forcing the wrong format.
+
+For ESM: set `"type": "module"` in your `package.json` or use `.mjs` file extensions.
+For CommonJS: use `require('nomyo-js')` or `.cjs` extensions.
+
+### TypeScript: `Cannot find module 'nomyo-js'` / missing types
+
+Ensure your `tsconfig.json` includes `"moduleResolution": "bundler"` or `"moduleResolution": "node16"` and that `nomyo-js` is in `dependencies` (not just `devDependencies`):
+
+```bash
+npm install nomyo-js
+```
+
+---
+
+## Browser-Specific Issues
+
+### `Content Security Policy blocked`
+
+If your app's CSP restricts `script-src` or `connect-src`, add the NOMYO API domain:
+
+```
+Content-Security-Policy: connect-src https://api.nomyo.ai;
+```
+
+### `TypeError: Failed to fetch` (CORS)
+
+The NOMYO API includes CORS headers. If you see CORS errors in a browser, verify the `baseUrl` is correct (HTTPS, correct port) and that no browser extension is blocking the request.
+
+### Keys not persisted across page reloads
+
+This is expected behaviour — browsers do not have file system access. Keys are generated fresh on each page load. If you need persistent keys in a browser context, implement your own `loadKeys`/`generateKeys` wrapper using `localStorage` or `IndexedDB` (not recommended for high-security scenarios).
+
+---
+
+## Debugging Tips
+
+### Enable verbose logging
+
+```javascript
+const client = new SecureChatCompletion({
+  apiKey: process.env.NOMYO_API_KEY,
+  debug: true,
+});
+```
+
+Debug mode logs: key generation/loading, server public key fetches, request encryption details, retry attempts, and response decryption.
+
+### Check memory protection status
+
+```javascript
+import { getMemoryProtectionInfo } from 'nomyo-js';
+console.log(getMemoryProtectionInfo());
+```
+
+### Inspect response metadata
+
+The `_metadata` field in every response carries server-side diagnostics:
+
+```javascript
+const response = await client.create({ ... });
+console.log(response._metadata);
+// {
+//   payload_id: '...',
+//   is_encrypted: true,
+//   encryption_algorithm: 'hybrid-aes256-rsa4096',
+//   security_tier: 'standard',
+//   memory_protection: { ... },
+// }
+```
+
+### Test with minimum configuration
+
+Strip all optional configuration and test with the simplest possible call to isolate the issue:
+
+```javascript
+import { SecureChatCompletion } from 'nomyo-js';
+
+const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY });
+const r = await client.create({
+  model: 'Qwen/Qwen3-0.6B',
+  messages: [{ role: 'user', content: 'ping' }],
+});
+console.log(r.choices[0].message.content);
+client.dispose();
+```
--- a/docs/SECURITY.md
+++ b/docs/SECURITY.md
@ -143,7 +143,7 @@ const client = new SecureChatCompletion({ baseUrl: 'https://...' });
 # .env file (never commit to git)
 NOMYO_API_KEY=your-api-key
 NOMYO_KEY_PASSWORD=your-key-password
-NOMYO_SERVER_URL=https://api.nomyo.ai:12434
+NOMYO_SERVER_URL=https://api.nomyo.ai
 ```

 ---
--- a/examples/node/basic.js
+++ b/examples/node/basic.js
@ -7,7 +7,7 @@ import { SecureChatCompletion } from 'nomyo-js';
 async function main() {
    // Initialize client
    const client = new SecureChatCompletion({
-        baseUrl: 'https://api.nomyo.ai:12434',
+        baseUrl: 'https://api.nomyo.ai',
        // For local development, use:
        // baseUrl: 'http://localhost:12434',
        // allowHttp: true
--- a/examples/node/with-tools.js
+++ b/examples/node/with-tools.js
@ -6,7 +6,7 @@ import { SecureChatCompletion } from 'nomyo-js';

 async function main() {
    const client = new SecureChatCompletion({
-        baseUrl: 'https://api.nomyo.ai:12434'
+        baseUrl: 'https://api.nomyo.ai'
    });

    try {
--- a/src/api/SecureChatCompletion.ts
+++ b/src/api/SecureChatCompletion.ts
@ -15,7 +15,7 @@ export class SecureChatCompletion {

    constructor(config: ChatCompletionConfig = {}) {
        const {
-            baseUrl = 'https://api.nomyo.ai:12435',
+            baseUrl = 'https://api.nomyo.ai',
            allowHttp = false,
            apiKey,
            secureMemory = true,
--- a/src/core/SecureCompletionClient.ts
+++ b/src/core/SecureCompletionClient.ts
@ -79,9 +79,9 @@ export class SecureCompletionClient {
    // Promise-based mutex: serialises concurrent ensureKeys() calls
    private ensureKeysLock: Promise<void> = Promise.resolve();

-    constructor(config: ClientConfig = { routerUrl: 'https://api.nomyo.ai:12435' }) {
+    constructor(config: ClientConfig = { routerUrl: 'https://api.nomyo.ai' }) {
        const {
-            routerUrl = 'https://api.nomyo.ai:12435',
+            routerUrl = 'https://api.nomyo.ai',
            allowHttp = false,
            secureMemory = true,
            keySize = 4096,
--- a/src/types/client.ts
+++ b/src/types/client.ts
@ -3,7 +3,7 @@
 */

 export interface ClientConfig {
-    /** Base URL of the NOMYO router (e.g., https://api.nomyo.ai:12434) */
+    /** Base URL of the NOMYO router (e.g., https://api.nomyo.ai) */
    routerUrl: string;

    /** Allow HTTP connections (ONLY for local development, never in production) */