From 43165f86f2c4a10e11aa7ddb51ec1c667b018348 Mon Sep 17 00:00:00 2001 From: alpha nerd Date: Thu, 16 Apr 2026 16:44:26 +0200 Subject: [PATCH] fix: base_url doc: created --- CONTRIBUTING.md | 11 + README.md | 524 +++++++++++++---------------- doc/README.md | 49 +++ doc/api-reference.md | 272 +++++++++++++++ doc/examples.md | 437 ++++++++++++++++++++++++ doc/getting-started.md | 279 +++++++++++++++ doc/installation.md | 107 ++++++ doc/models.md | 85 +++++ doc/rate-limits.md | 115 +++++++ doc/security-guide.md | 237 +++++++++++++ doc/troubleshooting.md | 314 +++++++++++++++++ docs/SECURITY.md | 2 +- examples/node/basic.js | 2 +- examples/node/with-tools.js | 2 +- src/api/SecureChatCompletion.ts | 2 +- src/core/SecureCompletionClient.ts | 4 +- src/types/client.ts | 2 +- 17 files changed, 2151 insertions(+), 293 deletions(-) create mode 100644 CONTRIBUTING.md create mode 100644 doc/README.md create mode 100644 doc/api-reference.md create mode 100644 doc/examples.md create mode 100644 doc/getting-started.md create mode 100644 doc/installation.md create mode 100644 doc/models.md create mode 100644 doc/rate-limits.md create mode 100644 doc/security-guide.md create mode 100644 doc/troubleshooting.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..24b1f8f --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,11 @@ +# Contributing + +## Development Setup + +```bash +npm install +npm run build +npm test +``` + +Node.js 18 LTS or later is required for tests and the TypeScript compiler. diff --git a/README.md b/README.md index 2749bf8..e48516f 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,13 @@ -# NOMYO.js - Secure JavaScript Chat Client +# NOMYO.js โ€” Secure JavaScript Chat Client **OpenAI-compatible secure chat client with end-to-end encryption for NOMYO Inference Endpoints** -๐Ÿ”’ **All prompts and responses are automatically encrypted and decrypted** -๐Ÿ”‘ **Uses hybrid encryption (AES-256-GCM + RSA-OAEP with 4096-bit keys)** -๐Ÿ”„ **Drop-in replacement for OpenAI's ChatCompletion API** -๐ŸŒ **Works in both Node.js and browsers** +- All prompts and responses are automatically encrypted and decrypted +- Hybrid encryption: AES-256-GCM payload + RSA-OAEP-SHA256 key exchange, 4096-bit keys +- Drop-in replacement for OpenAI's ChatCompletion API +- Works in both Node.js and browsers -## ๐Ÿš€ Quick Start +## Quick Start ### Installation @@ -20,371 +20,323 @@ npm install nomyo-js ```javascript import { SecureChatCompletion } from 'nomyo-js'; -// Initialize client (defaults to https://api.nomyo.ai:12434) const client = new SecureChatCompletion({ - baseUrl: 'https://api.nomyo.ai:12434' + apiKey: process.env.NOMYO_API_KEY, }); -// Simple chat completion const response = await client.create({ model: 'Qwen/Qwen3-0.6B', - messages: [ - { role: 'user', content: 'Hello! How are you today?' } - ], - temperature: 0.7 + messages: [{ role: 'user', content: 'Hello!' }], + temperature: 0.7, }); console.log(response.choices[0].message.content); +client.dispose(); ``` ### Basic Usage (Browser) ```html - - - - - - -

NOMYO Secure Chat

- - + console.log(response.choices[0].message.content); + ``` -## ๐Ÿ” Security Features +## Documentation + +Full documentation is in the [`doc/`](doc/) directory: + +- [Getting Started](doc/getting-started.md) โ€” walkthrough for new users +- [API Reference](doc/api-reference.md) โ€” complete constructor options, methods, types, and error classes +- [Models](doc/models.md) โ€” available models and selection guide +- [Security Guide](doc/security-guide.md) โ€” encryption, memory protection, key management, compliance +- [Rate Limits](doc/rate-limits.md) โ€” limits, automatic retry behaviour, batch throttling +- [Examples](doc/examples.md) โ€” 12+ code examples for common scenarios +- [Troubleshooting](doc/troubleshooting.md) โ€” error reference and debugging tips + +## Security Features ### Hybrid Encryption --**Payload encryption**: AES-256-GCM (authenticated encryption) -- **Key exchange**: RSA-OAEP with SHA-256 +- **Payload encryption**: AES-256-GCM (authenticated encryption) +- **Key exchange**: RSA-OAEP-SHA256 - **Key size**: 4096-bit RSA keys -- **All communication**: End-to-end encrypted +- **Scope**: All communication is end-to-end encrypted ### Key Management -- **Automatic key generation**: Keys are automatically generated on first use -- **Automatic key loading**: Existing keys are loaded automatically from `client_keys/` directory (Node.js only) -- **No manual intervention required**: The library handles key management automatically -- **Optional persistence**: Keys can be saved to `client_keys/` directory for reuse across sessions (Node.js only) -- **Password protection**: Optional password encryption for private keys (minimum 8 characters required) -- **Secure permissions**: Private keys stored with restricted permissions (600 - owner-only access) -- **Automatic key rotation**: Keys are rotated on a configurable interval (default: 24 hours) to limit fingerprint lifetime -- **Explicit lifecycle management**: Call `dispose()` to immediately zero in-memory key material and stop the rotation timer +- **Automatic**: Keys are generated on first use and saved to `keyDir` (default: `client_keys/`). Existing keys are reloaded on subsequent runs. Node.js only. +- **Password protection**: Optional AES-encrypted private key files (minimum 8 characters). +- **Secure permissions**: Private key files saved at `0600` (owner-only). +- **Auto-rotation**: Keys rotate every 24 hours by default (configurable via `keyRotationInterval`). +- **Explicit lifecycle**: Call `dispose()` to zero in-memory key material and stop the rotation timer. -### Secure Memory Protection +### Secure Memory -> [!NOTE] -> **Pure JavaScript Implementation**: This version uses pure JavaScript with immediate memory zeroing. -> OS-level memory locking (`mlock`) is NOT available without a native addon. -> For enhanced security in production, consider implementing the optional native addon (see `native/` directory). +The library wraps all intermediate sensitive buffers (AES keys, plaintext payload, decrypted bytes) in `SecureByteContext`, which zeroes them in a `finally` block immediately after use. -- **Automatic cleanup**: Sensitive data is zeroed from memory immediately after use -- **Best-effort protection**: Minimizes exposure time of sensitive data -- **Fallback mechanism**: Graceful degradation if enhanced security is unavailable - -## ๐Ÿ”„ OpenAI Compatibility - -The `SecureChatCompletion` class provides **exact API compatibility** with OpenAI's `ChatCompletion.create()` method. - -### Supported Parameters - -All standard OpenAI parameters are supported: - -- `model`: Model identifier -- `messages`: List of message objects -- `temperature`: Sampling temperature (0-2) -- `max_tokens`: Maximum tokens to generate -- `top_p`: Nucleus sampling -- `frequency_penalty`: Frequency penalty -- `presence_penalty`: Presence penalty -- `stop`: Stop sequences -- `n`: Number of completions -- `tools`: Tool definitions -- `tool_choice`: Tool selection strategy -- `user`: User identifier - -### Response Format - -Responses follow the OpenAI format exactly, with an additional `_metadata` field for debugging and security information: +Pure JavaScript cannot lock pages to prevent OS swapping (`mlock`). For environments where swap-file exposure is unacceptable, install the optional `nomyo-native` addon. Check the current protection level: ```javascript -{ - "id": "chatcmpl-123", - "object": "chat.completion", - "created": 1234567890, - "model": "Qwen/Qwen3-0.6B", - "choices": [ - { - "index": 0, - "message": { - "role": "assistant", - "content": "Hello! I'm doing well, thank you for asking." - }, - "finish_reason": "stop" - } - ], - "usage": { - "prompt_tokens": 10, - "completion_tokens": 20, - "total_tokens": 30 - }, - "_metadata": { - "payload_id": "openai-compat-abc123", - "processed_at": 1765250382, - "is_encrypted": true, - "encryption_algorithm": "hybrid-aes256-rsa4096", - "response_status": "success" - } +import { getMemoryProtectionInfo } from 'nomyo-js'; + +const info = getMemoryProtectionInfo(); +// Without addon: { method: 'zero-only', canLock: false } +// With addon: { method: 'mlock', canLock: true } +``` + +### Security Tiers + +Pass `security_tier` per request to route inference to increasingly isolated hardware: + +| Tier | Hardware | Use case | +|------|----------|----------| +| `"standard"` | GPU | General secure inference | +| `"high"` | CPU/GPU balanced | Sensitive business data | +| `"maximum"` | CPU only | HIPAA PHI, classified data | + +```javascript +const response = await client.create({ + model: 'Qwen/Qwen3-0.6B', + messages: [{ role: 'user', content: 'Patient record summary...' }], + security_tier: 'maximum', +}); +``` + +## Usage Examples + +### With API Key + +```javascript +const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY }); +``` + +### Error Handling + +```javascript +import { + SecureChatCompletion, + AuthenticationError, + RateLimitError, + ForbiddenError, +} from 'nomyo-js'; + +try { + const response = await client.create({ model: 'Qwen/Qwen3-0.6B', messages: [...] }); +} catch (err) { + if (err instanceof AuthenticationError) console.error('Check API key:', err.message); + else if (err instanceof RateLimitError) console.error('Rate limit hit:', err.message); + else if (err instanceof ForbiddenError) console.error('Model/tier mismatch:', err.message); + else throw err; } ``` -## ๐Ÿ› ๏ธ Usage Examples +### Per-Request Router Override -### Basic Chat +Send a single request to a different router without changing the main client: ```javascript -import { SecureChatCompletion } from 'nomyo-js'; - -const client = new SecureChatCompletion({ - baseUrl: 'https://api.nomyo.ai:12434' -}); - const response = await client.create({ - model: 'Qwen/Qwen3-0.6B', - messages: [ - { role: 'system', content: 'You are a helpful assistant.' }, - { role: 'user', content: 'What is the capital of France?' } - ], - temperature: 0.7 + model: 'Qwen/Qwen3-0.6B', + messages: [{ role: 'user', content: 'Hello from secondary router' }], + base_url: 'https://secondary.nomyo.ai:12435', // temporary โ€” main client unchanged }); - -console.log(response.choices[0].message.content); ``` -### With Tools +### Tool / Function Calling ```javascript const response = await client.create({ - model: 'Qwen/Qwen3-0.6B', - messages: [ - { role: 'user', content: "What's the weather in Paris?" } - ], + model: 'Qwen/Qwen3-0.6B', + messages: [{ role: 'user', content: "What's the weather in Paris?" }], tools: [ { type: 'function', function: { - name: 'get_weather', - description: 'Get weather information', + name: 'get_weather', + description: 'Get weather information for a location', parameters: { type: 'object', - properties: { - location: { type: 'string' } - }, - required: ['location'] - } - } - } - ] + properties: { location: { type: 'string' } }, + required: ['location'], + }, + }, + }, + ], + tool_choice: 'auto', }); ``` -### With API Key Authentication +### Thinking Models ```javascript -const client = new SecureChatCompletion({ - baseUrl: 'https://api.nomyo.ai:12434', - apiKey: 'your-api-key-here' -}); - -// API key will be automatically included in all requests const response = await client.create({ - model: 'Qwen/Qwen3-0.6B', - messages: [ - { role: 'user', content: 'Hello!' } - ] -}); -``` - -### Custom Key Management (Node.js) - -```javascript -import { SecureCompletionClient } from 'nomyo-js'; - -const client = new SecureCompletionClient({ - routerUrl: 'https://api.nomyo.ai:12434' + model: 'LiquidAI/LFM2.5-1.2B-Thinking', + messages: [{ role: 'user', content: 'Is 9.9 larger than 9.11?' }], }); -// Generate keys with password protection -await client.generateKeys({ - saveToFile: true, - keyDir: 'client_keys', - password: 'your-secure-password' -}); - -// Or load existing keys -await client.loadKeys( - 'client_keys/private_key.pem', - 'client_keys/public_key.pem', - 'your-secure-password' -); +const { content, reasoning_content } = response.choices[0].message; +console.log('Reasoning:', reasoning_content); +console.log('Answer:', content); ``` ### Resource Management -Always call `dispose()` when finished to zero key material and stop the background rotation timer: - ```javascript -const client = new SecureChatCompletion({ - baseUrl: 'https://api.nomyo.ai:12434', - keyRotationInterval: 3600000, // rotate every hour -}); +const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY }); try { const response = await client.create({ model: 'Qwen/Qwen3-0.6B', messages: [...] }); console.log(response.choices[0].message.content); } finally { - client.dispose(); + client.dispose(); // zeros key material, stops rotation timer } ``` -To disable key rotation entirely (e.g. short-lived scripts): - -```javascript -const client = new SecureChatCompletion({ - baseUrl: 'https://api.nomyo.ai:12434', - keyRotationInterval: 0, // disabled -}); -``` - -## ๐Ÿงช Platform Support - -### Node.js - -- **Minimum version**: Node.js 14.17+ -- **Recommended**: Node.js 18 LTS or later -- **Key storage**: File system (`client_keys/` directory) -- **Security**: Full implementation with automatic key persistence - -### Browsers - -- **Supported browsers**: Modern browsers with Web Crypto API support - - Chrome 37+ - - Firefox 34+ - - Safari 11+ - - Edge 79+ -- **Key storage**: In-memory only (keys not persisted for security) -- **Security**: Best-effort memory protection (no OS-level locking) - -## ๐Ÿ“š API Reference - -### SecureChatCompletion - -#### Constructor - -```typescript -new SecureChatCompletion(config?: { - baseUrl?: string; // Default: 'https://api.nomyo.ai:12434' - allowHttp?: boolean; // Default: false - apiKey?: string; // Default: undefined - secureMemory?: boolean; // Default: true - timeout?: number; // Request timeout ms. Default: 60000 - debug?: boolean; // Enable verbose logging. Default: false - keyRotationInterval?: number; // Key rotation ms. 0 = disabled. Default: 86400000 (24h) - keyRotationDir?: string; // Directory for rotated keys. Default: 'client_keys' - keyRotationPassword?: string; // Password for rotated key files -}) -``` - -#### Methods - -- `create(request: ChatCompletionRequest): Promise` -- `acreate(request: ChatCompletionRequest): Promise` (alias) -- `dispose(): void` โ€” zero key material and stop rotation timer - -### SecureCompletionClient - -Lower-level API for advanced use cases. - -#### Constructor - -```typescript -new SecureCompletionClient(config?: { - routerUrl?: string; // Default: 'https://api.nomyo.ai:12434' - allowHttp?: boolean; // Default: false - secureMemory?: boolean; // Default: true - keySize?: 2048 | 4096; // Default: 4096 - timeout?: number; // Request timeout ms. Default: 60000 - debug?: boolean; // Enable verbose logging. Default: false - keyRotationInterval?: number; // Key rotation ms. 0 = disabled. Default: 86400000 (24h) - keyRotationDir?: string; // Directory for rotated keys. Default: 'client_keys' - keyRotationPassword?: string; // Password for rotated key files -}) -``` - -#### Methods - -- `generateKeys(options?: KeyGenOptions): Promise` -- `loadKeys(privateKeyPath: string, publicKeyPath?: string, password?: string): Promise` -- `fetchServerPublicKey(): Promise` -- `encryptPayload(payload: object): Promise` -- `decryptResponse(encrypted: ArrayBuffer, payloadId: string): Promise` -- `sendSecureRequest(payload: object, payloadId: string, apiKey?: string): Promise` -- `dispose(): void` โ€” zero key material and stop rotation timer - -## ๐Ÿ”ง Configuration - ### Local Development (HTTP) ```javascript const client = new SecureChatCompletion({ - baseUrl: 'http://localhost:12434', - allowHttp: true // Required for HTTP connections + baseUrl: 'http://localhost:12435', + allowHttp: true, // required โ€” also prints a visible warning }); ``` -โš ๏ธ **Warning**: Only use HTTP for local development. Never use in production! +## API Reference -### Disable Secure Memory +### `SecureChatCompletion` โ€” Constructor Options -```javascript -const client = new SecureChatCompletion({ - baseUrl: 'https://api.nomyo.ai:12434', - secureMemory: false // Disable memory protection (not recommended) -}); +```typescript +new SecureChatCompletion(config?: ChatCompletionConfig) ``` -## ๐Ÿ“ Security Best Practices +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `baseUrl` | `string` | `'https://api.nomyo.ai'` | NOMYO router URL. Must be HTTPS in production. | +| `allowHttp` | `boolean` | `false` | Allow HTTP connections. Local development only. | +| `apiKey` | `string` | `undefined` | Bearer token for `Authorization` header. | +| `secureMemory` | `boolean` | `true` | Zero sensitive buffers immediately after use. | +| `timeout` | `number` | `60000` | Request timeout in milliseconds. | +| `debug` | `boolean` | `false` | Print verbose logging to the console. | +| `keyDir` | `string` | `'client_keys'` | Directory to load/save RSA keys on startup. | +| `keyRotationInterval` | `number` | `86400000` | Auto-rotate keys every N ms. `0` disables rotation. | +| `keyRotationDir` | `string` | `'client_keys'` | Directory for rotated key files. Node.js only. | +| `keyRotationPassword` | `string` | `undefined` | Password for encrypted rotated key files. | +| `maxRetries` | `number` | `2` | Extra retry attempts on 429/5xx/network errors. Exponential backoff (1 s, 2 s, โ€ฆ). | -- โœ… Always use HTTPS in production -- โœ… Use password protection for private keys (Node.js) -- โœ… Keep private keys secure (permissions set to 600) -- โœ… Never share your private key -- โœ… Verify server's public key fingerprint before first use -- โœ… Enable secure memory protection (default) +#### Methods -## ๐Ÿค Contributing +- `create(request): Promise` โ€” send an encrypted chat completion +- `acreate(request): Promise` โ€” alias for `create()` +- `dispose(): void` โ€” zero key material and stop the rotation timer -Contributions are welcome! Please open issues or pull requests on the project repository. +#### `create()` Request Fields -## ๐Ÿ“„ License +All standard OpenAI fields (`model`, `messages`, `temperature`, `top_p`, `max_tokens`, `stop`, `n`, `tools`, `tool_choice`, `user`, `frequency_penalty`, `presence_penalty`, `logit_bias`) plus: -See LICENSE file for licensing information. +| Field | Description | +|-------|-------------| +| `security_tier` | `"standard"` \| `"high"` \| `"maximum"` โ€” hardware isolation level | +| `api_key` | Per-request API key override | +| `base_url` | Per-request router URL override โ€” creates a temporary client, used once, then disposed | -## ๐Ÿ“ž Support +### `SecureCompletionClient` โ€” Constructor Options -For questions or issues, please refer to the project documentation or open an issue. +Lower-level client. All options above apply, with these differences: + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `routerUrl` | `string` | `'https://api.nomyo.ai'` | Base URL (`baseUrl` is renamed here) | +| `keySize` | `2048 \| 4096` | `4096` | RSA modulus length | + +#### Methods + +- `generateKeys(options?)` โ€” generate a new RSA key pair +- `loadKeys(privateKeyPath, publicKeyPath?, password?)` โ€” load existing PEM files +- `fetchServerPublicKey()` โ€” fetch the server's RSA public key +- `encryptPayload(payload)` โ€” encrypt a request payload +- `decryptResponse(encrypted, payloadId)` โ€” decrypt a response body +- `sendSecureRequest(payload, payloadId, apiKey?, securityTier?)` โ€” full encrypt โ†’ POST โ†’ decrypt cycle +- `dispose()` โ€” zero key material and stop rotation timer + +### Secure Memory Public API + +```typescript +import { + getMemoryProtectionInfo, + disableSecureMemory, + enableSecureMemory, + SecureByteContext, +} from 'nomyo-js'; +``` + +| Export | Description | +|--------|-------------| +| `getMemoryProtectionInfo()` | Returns `{ method, canLock, isPlatformSecure, details? }` | +| `disableSecureMemory()` | Disable global secure-memory zeroing | +| `enableSecureMemory()` | Re-enable global secure-memory zeroing | +| `SecureByteContext` | Low-level buffer wrapper โ€” zeros in `finally` block | + +### Error Classes + +```typescript +import { + AuthenticationError, InvalidRequestError, RateLimitError, + ForbiddenError, ServerError, ServiceUnavailableError, + APIConnectionError, SecurityError, DisposedError, APIError, +} from 'nomyo-js'; +``` + +| Class | HTTP | Thrown when | +|-------|------|-------------| +| `AuthenticationError` | 401 | Invalid or missing API key | +| `InvalidRequestError` | 400 | Malformed request | +| `ForbiddenError` | 403 | Model not allowed for the security tier | +| `RateLimitError` | 429 | Rate limit exceeded (after all retries) | +| `ServerError` | 500 | Internal server error (after all retries) | +| `ServiceUnavailableError` | 503 | Backend unavailable (after all retries) | +| `APIError` | varies | Other HTTP errors | +| `APIConnectionError` | โ€” | Network failure or timeout (after all retries) | +| `SecurityError` | โ€” | HTTPS not used, header injection, or crypto failure | +| `DisposedError` | โ€” | Method called after `dispose()` | + +## Platform Support + +### Node.js + +- **Minimum**: Node.js 14.17+ +- **Recommended**: Node.js 18 LTS or later +- **Key storage**: File system (`keyDir` directory, default `client_keys/`) + +### Browsers + +- **Supported**: Chrome 37+, Firefox 34+, Safari 11+, Edge 79+ +- **Key storage**: In-memory only (not persisted) +- **Limitation**: File-based key operations (`keyDir`, `loadKeys`) are not available + +## Security Best Practices + +- Always use HTTPS (`allowHttp` is `false` by default) +- Load API key from an environment variable, never hardcode it +- Use password-protected key files (`keyRotationPassword`) +- Store keys outside the project directory and outside version control +- Add `client_keys/` and `*.pem` to `.gitignore` +- Call `dispose()` when the client is no longer needed +- Use `security_tier: 'maximum'` for HIPAA PHI or classified data +- Consider the `nomyo-native` addon if swap-file exposure is unacceptable + +## License + +See LICENSE file. diff --git a/doc/README.md b/doc/README.md new file mode 100644 index 0000000..c98188d --- /dev/null +++ b/doc/README.md @@ -0,0 +1,49 @@ +# NOMYO.js Documentation + +Comprehensive documentation for the NOMYO secure JavaScript/TypeScript chat client โ€” a drop-in replacement for OpenAI's `ChatCompletion` API with end-to-end encryption. + +To use this library you need an active subscription on [NOMYO Inference](https://chat.nomyo.ai/). + +## Quick Start + +```javascript +import { SecureChatCompletion } from 'nomyo-js'; + +const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY }); + +const response = await client.create({ + model: 'Qwen/Qwen3-0.6B', + messages: [{ role: 'user', content: 'Hello!' }], + security_tier: 'standard', +}); + +console.log(response.choices[0].message.content); +``` + +## Documentation + +1. [Installation](installation.md) โ€” npm, CDN, and native addon setup +2. [Getting Started](getting-started.md) โ€” first request, auth, security tiers, error handling +3. [API Reference](api-reference.md) โ€” complete constructor options, methods, and types +4. [Models](models.md) โ€” available models and selection guidance +5. [Security Guide](security-guide.md) โ€” encryption architecture, best practices, and compliance +6. [Rate Limits](rate-limits.md) โ€” request limits, burst behaviour, and retry strategy +7. [Examples](examples.md) โ€” real-world scenarios, browser usage, and advanced patterns +8. [Troubleshooting](troubleshooting.md) โ€” common errors and their fixes + +--- + +## Key Features + +- **End-to-end encryption** โ€” AES-256-GCM + RSA-OAEP-4096. No plaintext ever leaves your process. +- **OpenAI-compatible API** โ€” `create()` / `acreate()` accept the same parameters as the OpenAI SDK. +- **Browser + Node.js** โ€” single package, separate entry points for each runtime. +- **Automatic key management** โ€” keys are generated on first use and optionally persisted to disk (Node.js). +- **Automatic key rotation** โ€” RSA keys rotate on a configurable interval (default 24 h) to limit fingerprint lifetime. +- **Security tiers** โ€” per-request routing to `standard`, `high`, or `maximum` isolation hardware. +- **Retry with exponential backoff** โ€” automatic retries on 429 / 5xx / network errors (configurable). +- **Resource lifecycle** โ€” `dispose()` immediately zeros in-memory key material and stops the rotation timer. + +## Technical Security Docs + +For cryptographic architecture, threat model, and implementation status see [SECURITY.md](../docs/SECURITY.md). diff --git a/doc/api-reference.md b/doc/api-reference.md new file mode 100644 index 0000000..d49e702 --- /dev/null +++ b/doc/api-reference.md @@ -0,0 +1,272 @@ +# API Reference + +## `SecureChatCompletion` + +High-level OpenAI-compatible client. The recommended entry point for most use cases. + +### Constructor + +```typescript +new SecureChatCompletion(config?: ChatCompletionConfig) +``` + +#### `ChatCompletionConfig` + + +| Option | Type | Default | Description | +| ----------------------- | ----------- | -------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `baseUrl` | `string` | `'https://api.nomyo.ai'` | NOMYO router URL. Must be HTTPS in production. | +| `allowHttp` | `boolean` | `false` | Allow HTTP connections.**Local development only.** | +| `apiKey` | `string` | `undefined` | Bearer token sent in`Authorization` header. | +| `secureMemory` | `boolean` | `true` | Enable immediate zeroing of sensitive buffers after use. | +| `timeout` | `number` | `60000` | Request timeout in milliseconds. | +| `debug` | `boolean` | `false` | Print verbose logging to the console. | +| `keyDir` | `string` | `'client_keys'` | Directory to load/save RSA keys on startup. If the directory contains an existing key pair it is loaded; otherwise a new pair is generated and saved there. Node.js only. | +| `keyRotationInterval` | `number` | `86400000` (24 h) | Auto-rotate RSA keys every N milliseconds. Set to`0` to disable. | +| `keyRotationDir` | `string` | `'client_keys'` | Directory where rotated key files are saved. Node.js only. | +| `keyRotationPassword` | `string` | `undefined` | Password used to encrypt rotated key files. | +| `maxRetries` | `number` | `2` | Maximum extra attempts on retryable errors (429, 500, 502, 503, 504, network errors). Uses exponential backoff (1 s, 2 s, โ€ฆ). Set to`0` to disable retries. | + +### Methods + +#### `create(request): Promise` + +Send an encrypted chat completion request. Returns the decrypted response. + +```typescript +async create(request: ChatCompletionRequest): Promise +``` + +**`ChatCompletionRequest` fields:** + + +| Field | Type | Description | +| --------------------- | -------------------------- | -------------------------------------------------------------------------------------------------------------------------------- | +| `model` | `string` | **Required.** Model ID (see [Models](models.md)). | +| `messages` | `Message[]` | **Required.** Conversation history. | +| `temperature` | `number` | Sampling temperature (0โ€“2). | +| `top_p` | `number` | Nucleus sampling. | +| `max_tokens` | `number` | Maximum tokens to generate. | +| `stop` | `string | string[]` | Stop sequences. | +| `n` | `number` | Number of completions to generate. | +| `stream` | `boolean` | Ignored server-side (encryption requires full response). | +| `presence_penalty` | `number` | Presence penalty (โˆ’2.0โ€“2.0). | +| `frequency_penalty` | `number` | Frequency penalty (โˆ’2.0โ€“2.0). | +| `logit_bias` | `Record` | Token bias map. | +| `user` | `string` | End-user identifier (passed through). | +| `tools` | `Tool[]` | Tool/function definitions. | +| `tool_choice` | `ToolChoice` | Tool selection strategy (`"auto"`, `"none"`, `"required"`, or specific tool). | +| `security_tier` | `string` | NOMYO-specific.`"standard"` \| `"high"` \| `"maximum"`. Not encrypted into the payload. | +| `api_key` | `string` | NOMYO-specific. Per-request API key override. Not encrypted into the payload. | +| `base_url` | `string` | NOMYO-specific. Per-request router URL override. Creates a temporary client for this one call. Not encrypted into the payload. | + +**Response shape (`ChatCompletionResponse`):** + +```typescript +{ + id: string; + object: 'chat.completion'; + created: number; + model: string; + choices: Array<{ + index: number; + message: { + role: string; + content: string; + tool_calls?: ToolCall[]; // present if tools were invoked + reasoning_content?: string; // chain-of-thought (Qwen3, DeepSeek-R1, etc.) + }; + finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | null; + }>; + usage?: { + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; + }; + _metadata?: { + payload_id: string; // echoes the X-Payload-ID sent with the request + processed_at: number; // Unix timestamp of server-side processing + is_encrypted: boolean; // always true for this endpoint + encryption_algorithm: string; // e.g. "hybrid-aes256-rsa4096" + response_status: string; // "success" on success + security_tier?: string; // active tier used by the server + memory_protection?: { + platform: string; + memory_locking: boolean; + secure_zeroing: boolean; + core_dump_prevention: boolean; + }; + cuda_device?: { + available: boolean; + device_hash: string; // SHA-256 of device name (not the raw name) + }; + }; +} +``` + +#### `acreate(request): Promise` + +Alias for `create()`. Provided for code that follows the OpenAI SDK naming convention. + +#### `dispose(): void` + +Stop the key-rotation timer and sever in-memory RSA key references so they can be garbage-collected. After calling `dispose()`, all methods throw `DisposedError`. + +```javascript +client.dispose(); +``` + +--- + +## `SecureCompletionClient` + +Lower-level client that exposes key management and individual encryption/decryption operations. +Use this when you need fine-grained control; for most use cases prefer `SecureChatCompletion`. + +### Constructor + +```typescript +new SecureCompletionClient(config?: ClientConfig) +``` + +#### `ClientConfig` + +All options from `ChatCompletionConfig`, plus: + + +| Option | Type | Default | Description | +| ------------- | --------------- | -------------------------------- | --------------------------------------------------------------- | +| `routerUrl` | `string` | `'https://api.nomyo.ai'` | NOMYO router base URL. | +| `keySize` | `2048 | 4096` | `4096` | RSA modulus length. 2048 is accepted but 4096 is recommended. | + +(`baseUrl` is renamed to `routerUrl` at this level; all other options are identical.) + +### Methods + +#### `generateKeys(options?): Promise` + +Generate a fresh RSA key pair. + +```typescript +await client.generateKeys({ + keySize?: 2048 | 4096, // default: 4096 + saveToFile?: boolean, // default: false + keyDir?: string, // default: 'client_keys' + password?: string, // minimum 8 characters if provided +}); +``` + +#### `loadKeys(privateKeyPath, publicKeyPath?, password?): Promise` + +Load an existing key pair from PEM files. Node.js only. + +```typescript +await client.loadKeys( + 'client_keys/private_key.pem', + 'client_keys/public_key.pem', // optional; derived from private key path if omitted + 'your-password' // required if private key is encrypted +); +``` + +#### `fetchServerPublicKey(): Promise` + +Fetch the server's RSA public key from `/pki/public_key` over HTTPS. Called automatically on every encryption; exposed for diagnostics. + +#### `encryptPayload(payload): Promise` + +Encrypt a request payload. Returns the encrypted binary package ready to POST. + +#### `decryptResponse(encrypted, payloadId): Promise` + +Decrypt a response body received from the secure endpoint. + +#### `sendSecureRequest(payload, payloadId, apiKey?, securityTier?): Promise` + +Full encrypt โ†’ POST โ†’ decrypt cycle with retry logic. Called internally by `SecureChatCompletion.create()`. + +#### `dispose(): void` + +Same as `SecureChatCompletion.dispose()`. + +--- + +## Secure Memory API + +```typescript +import { + getMemoryProtectionInfo, + disableSecureMemory, + enableSecureMemory, + SecureByteContext, +} from 'nomyo-js'; +``` + +### `getMemoryProtectionInfo(): ProtectionInfo` + +Returns information about the memory protection available on the current platform: + +```typescript +interface ProtectionInfo { + canLock: boolean; // true if mlock is available (requires native addon) + isPlatformSecure: boolean; + method: 'mlock' | 'zero-only' | 'none'; + details?: string; +} +``` + +### `disableSecureMemory(): void` + +Disable secure-memory zeroing globally. Affects new `SecureByteContext` instances that do not pass an explicit `useSecure` argument. Existing client instances are unaffected (they pass `useSecure` explicitly). + +### `enableSecureMemory(): void` + +Re-enable secure memory operations globally. + +### `SecureByteContext` + +Low-level context manager that zeros an `ArrayBuffer` in a `finally` block even if an exception occurs. Analogous to Python's `secure_bytearray()` context manager. + +```typescript +const context = new SecureByteContext(sensitiveBuffer); +const result = await context.use(async (data) => { + return doSomethingWith(data); +}); +// sensitiveBuffer is zeroed here regardless of whether doSomethingWith threw +``` + +--- + +## Error Classes + +All errors are exported from the package root. + +```typescript +import { + APIError, + AuthenticationError, + InvalidRequestError, + RateLimitError, + ForbiddenError, + ServerError, + ServiceUnavailableError, + APIConnectionError, + SecurityError, + DisposedError, +} from 'nomyo-js'; +``` + + +| Class | HTTP status | Thrown when | +| --------------------------- | ------------- | -------------------------------------------------------------- | +| `AuthenticationError` | 401 | Invalid or missing API key | +| `InvalidRequestError` | 400 | Malformed request (e.g. streaming requested) | +| `ForbiddenError` | 403 | Model not allowed for the requested security tier | +| `RateLimitError` | 429 | Rate limit exceeded (after all retries exhausted) | +| `ServerError` | 500 | Internal server error (after all retries exhausted) | +| `ServiceUnavailableError` | 503 | Inference backend unavailable (after all retries exhausted) | +| `APIError` | varies | Other HTTP errors (404, 502, 504, etc.) | +| `APIConnectionError` | โ€” | Network failure or timeout (after all retries exhausted) | +| `SecurityError` | โ€” | HTTPS not used, header injection detected, or crypto failure | +| `DisposedError` | โ€” | Method called after`dispose()` | + +All errors that extend `APIError` expose `statusCode?: number` and `errorDetails?: object`. diff --git a/doc/examples.md b/doc/examples.md new file mode 100644 index 0000000..4a57179 --- /dev/null +++ b/doc/examples.md @@ -0,0 +1,437 @@ +# Examples + +## Basic Usage + +### Simple Chat + +```javascript +import { SecureChatCompletion } from 'nomyo-js'; + +const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY }); + +const response = await client.create({ + model: 'Qwen/Qwen3-0.6B', + messages: [{ role: 'user', content: 'Hello, how are you?' }], + temperature: 0.7, +}); + +console.log(response.choices[0].message.content); +client.dispose(); +``` + +### Chat with System Message + +```javascript +const response = await client.create({ + model: 'Qwen/Qwen3-0.6B', + messages: [ + { role: 'system', content: 'You are a concise technical assistant.' }, + { role: 'user', content: 'What is the capital of France?' }, + ], + temperature: 0.7, +}); + +console.log(response.choices[0].message.content); +``` + +--- + +## Security Tiers + +```javascript +// Standard โ€” general use (GPU) +const r1 = await client.create({ + model: 'Qwen/Qwen3-0.6B', + messages: [{ role: 'user', content: 'General query' }], + security_tier: 'standard', +}); + +// High โ€” sensitive business data +const r2 = await client.create({ + model: 'Qwen/Qwen3-0.6B', + messages: [{ role: 'user', content: 'Review this contract clause...' }], + security_tier: 'high', +}); + +// Maximum โ€” HIPAA PHI / classified data (CPU-only) +const r3 = await client.create({ + model: 'Qwen/Qwen3-0.6B', + messages: [{ role: 'user', content: 'Patient record summary...' }], + security_tier: 'maximum', +}); +``` + +--- + +## Tool / Function Calling + +```javascript +const response = await client.create({ + model: 'Qwen/Qwen3-0.6B', + messages: [{ role: 'user', content: "What's the weather in Paris?" }], + tools: [ + { + type: 'function', + function: { + name: 'get_weather', + description: 'Get weather information for a location', + parameters: { + type: 'object', + properties: { + location: { type: 'string', description: 'City name' }, + }, + required: ['location'], + }, + }, + }, + ], + tool_choice: 'auto', +}); + +const message = response.choices[0].message; +if (message.tool_calls?.length) { + const call = message.tool_calls[0]; + const args = JSON.parse(call.function.arguments); + console.log(`Call ${call.function.name}(location="${args.location}")`); + // โ†’ Call get_weather(location="Paris") +} +``` + +--- + +## Error Handling + +```javascript +import { + SecureChatCompletion, + AuthenticationError, + RateLimitError, + ForbiddenError, + InvalidRequestError, + ServerError, + ServiceUnavailableError, + APIConnectionError, + SecurityError, +} from 'nomyo-js'; + +const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY }); + +try { + const response = await client.create({ + model: 'Qwen/Qwen3-0.6B', + messages: [{ role: 'user', content: 'Hello' }], + }); + console.log(response.choices[0].message.content); + +} catch (err) { + if (err instanceof AuthenticationError) { + console.error('Check your API key:', err.message); + } else if (err instanceof RateLimitError) { + console.error('Rate limit hit after all retries:', err.message); + } else if (err instanceof ForbiddenError) { + console.error('Model not allowed for this security tier:', err.message); + } else if (err instanceof InvalidRequestError) { + console.error('Bad request:', err.message, err.errorDetails); + } else if (err instanceof ServerError || err instanceof ServiceUnavailableError) { + console.error('Server error after retries:', err.message); + } else if (err instanceof APIConnectionError) { + console.error('Network error after retries:', err.message); + } else if (err instanceof SecurityError) { + console.error('Security/crypto failure:', err.message); + } else { + throw err; + } +} +``` + +--- + +## Real-World Scenarios + +### Chat Application with History + +```javascript +import { SecureChatCompletion } from 'nomyo-js'; + +class SecureChatApp { + constructor(apiKey) { + this.client = new SecureChatCompletion({ apiKey }); + this.history = []; + } + + async chat(userMessage) { + this.history.push({ role: 'user', content: userMessage }); + + const response = await this.client.create({ + model: 'Qwen/Qwen3-0.6B', + messages: this.history, + temperature: 0.7, + }); + + const assistantMessage = response.choices[0].message; + this.history.push({ role: assistantMessage.role, content: assistantMessage.content }); + return assistantMessage.content; + } + + dispose() { + this.client.dispose(); + } +} + +const app = new SecureChatApp(process.env.NOMYO_API_KEY); + +const r1 = await app.chat("What's your name?"); +console.log('Assistant:', r1); + +const r2 = await app.chat('What did I just ask you?'); +console.log('Assistant:', r2); + +app.dispose(); +``` + +### Per-Request Base URL Override + +For multi-tenant setups or testing against different router instances from a single client: + +```javascript +const client = new SecureChatCompletion({ + baseUrl: 'https://primary.nomyo.ai:12435', + apiKey: process.env.NOMYO_API_KEY, +}); + +// This single request goes to a different router; a temporary client is +// created, used, and disposed automatically โ€” the main client is unchanged +const response = await client.create({ + model: 'Qwen/Qwen3-0.6B', + messages: [{ role: 'user', content: 'Hello from secondary router' }], + base_url: 'https://secondary.nomyo.ai:12435', +}); +``` + +### Environment-Based Configuration + +```javascript +import 'dotenv/config'; +import { SecureChatCompletion } from 'nomyo-js'; + +const client = new SecureChatCompletion({ + baseUrl: process.env.NOMYO_SERVER_URL ?? 'https://api.nomyo.ai', + apiKey: process.env.NOMYO_API_KEY, + keyDir: process.env.NOMYO_KEY_DIR ?? 'client_keys', + maxRetries: Number(process.env.NOMYO_MAX_RETRIES ?? '2'), + debug: process.env.NODE_ENV === 'development', +}); +``` + +--- + +## Batch Processing + +### Sequential (Rate-Limit-Safe) + +```javascript +const queries = [ + 'Summarise document A', + 'Summarise document B', + 'Summarise document C', +]; + +const summaries = []; +for (const query of queries) { + const response = await client.create({ + model: 'Qwen/Qwen3-0.6B', + messages: [{ role: 'user', content: query }], + }); + summaries.push(response.choices[0].message.content); + // Optional: add a small delay to stay within rate limits + await new Promise(r => setTimeout(r, 600)); +} +``` + +### Concurrent (With Throttling) + +```javascript +// Process in batches of 2 (the default rate limit) +async function batchN(items, batchSize, fn) { + const results = []; + for (let i = 0; i < items.length; i += batchSize) { + const batch = items.slice(i, i + batchSize); + const batchResults = await Promise.all(batch.map(fn)); + results.push(...batchResults); + if (i + batchSize < items.length) { + await new Promise(r => setTimeout(r, 1100)); // wait >1 s between batches + } + } + return results; +} + +const summaries = await batchN(documents, 2, async (doc) => { + const response = await client.create({ + model: 'Qwen/Qwen3-0.6B', + messages: [{ role: 'user', content: `Summarise: ${doc}` }], + }); + return response.choices[0].message.content; +}); +``` + +--- + +## Thinking Models + +```javascript +const response = await client.create({ + model: 'LiquidAI/LFM2.5-1.2B-Thinking', + messages: [{ role: 'user', content: 'Is 9.9 larger than 9.11?' }], +}); + +const { content, reasoning_content } = response.choices[0].message; +console.log('Reasoning:', reasoning_content); // internal chain-of-thought +console.log('Answer:', content); // final answer to the user +``` + +--- + +## Browser Usage + +```html + + + + NOMYO Secure Chat + + + + +
+ + + + +``` + +--- + +## Advanced Key Management + +### Custom Key Directory + +```javascript +const client = new SecureChatCompletion({ + apiKey: process.env.NOMYO_API_KEY, + keyDir: '/var/lib/myapp/nomyo-keys', // outside project directory + keyRotationDir: '/var/lib/myapp/nomyo-keys', + keyRotationPassword: process.env.NOMYO_KEY_PASSWORD, +}); +``` + +### Generating Keys Manually + +```javascript +import { SecureCompletionClient } from 'nomyo-js'; + +const client = new SecureCompletionClient({ + routerUrl: 'https://api.nomyo.ai', +}); + +// Generate a new 4096-bit key pair and save it with password protection +await client.generateKeys({ + saveToFile: true, + keyDir: 'client_keys', + password: process.env.NOMYO_KEY_PASSWORD, +}); + +console.log('Keys generated and saved to client_keys/'); +client.dispose(); +``` + +### Loading Keys Explicitly + +```javascript +import { SecureCompletionClient } from 'nomyo-js'; + +const client = new SecureCompletionClient({ routerUrl: 'https://api.nomyo.ai' }); + +await client.loadKeys( + 'client_keys/private_key.pem', + 'client_keys/public_key.pem', + process.env.NOMYO_KEY_PASSWORD +); + +// Now send requests using the loaded keys +const result = await client.sendSecureRequest( + { model: 'Qwen/Qwen3-0.6B', messages: [{ role: 'user', content: 'Hello' }] }, + crypto.randomUUID() +); +client.dispose(); +``` + +--- + +## Inspecting Memory Protection + +```javascript +import { getMemoryProtectionInfo } from 'nomyo-js'; + +const info = getMemoryProtectionInfo(); + +console.log(`Memory method: ${info.method}`); // 'zero-only' or 'mlock' +console.log(`Can lock: ${info.canLock}`); // true if native addon present +console.log(`Details: ${info.details}`); +``` + +--- + +## TypeScript + +Full type safety out of the box: + +```typescript +import { + SecureChatCompletion, + ChatCompletionRequest, + ChatCompletionResponse, + Message, +} from 'nomyo-js'; + +const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY! }); + +const messages: Message[] = [ + { role: 'user', content: 'Hello!' }, +]; + +const request: ChatCompletionRequest = { + model: 'Qwen/Qwen3-0.6B', + messages, + temperature: 0.7, +}; + +const response: ChatCompletionResponse = await client.create(request); +const content = response.choices[0].message.content; + +client.dispose(); +``` diff --git a/doc/getting-started.md b/doc/getting-started.md new file mode 100644 index 0000000..d224213 --- /dev/null +++ b/doc/getting-started.md @@ -0,0 +1,279 @@ +# Getting Started + +## Overview + +NOMYO.js provides end-to-end encryption for all communication between your application and NOMYO inference endpoints. Your prompts and responses are encrypted before leaving your process and decrypted only after they arrive back โ€” the server never sees plaintext. + +The API mirrors OpenAI's `ChatCompletion`, making it easy to integrate into existing code. + +> **Note on streaming:** The API is non-streaming. Setting `stream: true` in a request is ignored server-side to maintain full response encryption. + +--- + +## Simple Chat Completion + +```javascript +import { SecureChatCompletion } from 'nomyo-js'; + +const client = new SecureChatCompletion({ + apiKey: process.env.NOMYO_API_KEY, +}); + +const response = await client.create({ + model: 'Qwen/Qwen3-0.6B', + messages: [{ role: 'user', content: 'Hello! How are you today?' }], + temperature: 0.7, +}); + +// Extract what you need, then let the response go out of scope promptly. +// This minimises the time decrypted data lives in process memory +// (reduces exposure from swap files, core dumps, or memory inspection). +const reply = response.choices[0].message.content; +console.log(reply); +``` + +### With a System Message + +```javascript +const response = await client.create({ + model: 'Qwen/Qwen3-0.6B', + messages: [ + { role: 'system', content: 'You are a helpful assistant.' }, + { role: 'user', content: 'What is the capital of France?' }, + ], + temperature: 0.7, +}); + +console.log(response.choices[0].message.content); +``` + +--- + +## API Key Authentication + +```javascript +// Constructor-level key (used for all requests from this instance) +const client = new SecureChatCompletion({ + apiKey: process.env.NOMYO_API_KEY, +}); + +// Per-request key override (takes precedence over constructor key) +const response = await client.create({ + model: 'Qwen/Qwen3-0.6B', + messages: [{ role: 'user', content: 'Hello!' }], + api_key: 'override-key-for-this-request', +}); +``` + +--- + +## Security Tiers + +Pass `security_tier` in the request to control hardware routing and isolation level: + + +| Tier | Use case | +| -------------- | ------------------------------------------------------- | +| `"standard"` | General secure inference (GPU) | +| `"high"` | Sensitive business data โ€” enforces secure tokenizer | +| `"maximum"` | HIPAA PHI, classified data โ€” E2EE, maximum isolation | + +```javascript +// Standard โ€” general use +const response = await client.create({ + model: 'Qwen/Qwen3-0.6B', + messages: [{ role: 'user', content: 'General query' }], + security_tier: 'standard', +}); + +// High โ€” sensitive business data +const response2 = await client.create({ + model: 'Qwen/Qwen3-0.6B', + messages: [{ role: 'user', content: 'Summarise this contract clause...' }], + security_tier: 'high', +}); + +// Maximum โ€” PHI / classified data +const response3 = await client.create({ + model: 'Qwen/Qwen3-0.6B', + messages: [{ role: 'user', content: 'Patient record summary...' }], + security_tier: 'maximum', +}); +``` + +> Using `"high"` or `"maximum"` adds latency vs `"standard"` due to additional isolation measures. + +--- + +## Using Tools (Function Calling) + +```javascript +const response = await client.create({ + model: 'Qwen/Qwen3-0.6B', + messages: [{ role: 'user', content: "What's the weather in Paris?" }], + tools: [ + { + type: 'function', + function: { + name: 'get_weather', + description: 'Get weather information for a location', + parameters: { + type: 'object', + properties: { + location: { type: 'string', description: 'City name' }, + }, + required: ['location'], + }, + }, + }, + ], + tool_choice: 'auto', + temperature: 0.7, +}); + +const message = response.choices[0].message; +if (message.tool_calls) { + const call = message.tool_calls[0]; + console.log('Tool called:', call.function.name); + console.log('Arguments:', call.function.arguments); +} +``` + +--- + +## Error Handling + +Import typed error classes to distinguish failure modes: + +```javascript +import { + SecureChatCompletion, + AuthenticationError, + RateLimitError, + InvalidRequestError, + ForbiddenError, + ServerError, + ServiceUnavailableError, + APIConnectionError, + SecurityError, +} from 'nomyo-js'; + +const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY }); + +try { + const response = await client.create({ + model: 'Qwen/Qwen3-0.6B', + messages: [{ role: 'user', content: 'Hello!' }], + }); + console.log(response.choices[0].message.content); + +} catch (err) { + if (err instanceof AuthenticationError) { + console.error('Bad API key:', err.message); + + } else if (err instanceof RateLimitError) { + // The client already retried automatically (default: 2 retries). + // If you reach here, all retries were exhausted. + console.error('Rate limit exceeded after retries:', err.message); + + } else if (err instanceof ForbiddenError) { + // Model not allowed for the requested security_tier + console.error('Forbidden:', err.message); + + } else if (err instanceof InvalidRequestError) { + console.error('Bad request:', err.message); + + } else if (err instanceof ServerError || err instanceof ServiceUnavailableError) { + console.error('Server error (retries exhausted):', err.message); + + } else if (err instanceof APIConnectionError) { + console.error('Network error (retries exhausted):', err.message); + + } else if (err instanceof SecurityError) { + console.error('Encryption/decryption failure:', err.message); + + } else { + throw err; // re-throw unexpected errors + } +} +``` + +All typed errors expose: + +- `message: string` โ€” human-readable description +- `statusCode?: number` โ€” HTTP status (where applicable) +- `errorDetails?: object` โ€” raw response body (where applicable) + +--- + +## Resource Management + +Always call `dispose()` when you're done with a client to stop the background key-rotation timer and zero in-memory key material: + +```javascript +const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY }); + +try { + const response = await client.create({ ... }); + console.log(response.choices[0].message.content); +} finally { + client.dispose(); +} +``` + +For long-running servers (HTTP handlers, daemons), create one client instance and reuse it โ€” don't create a new one per request. + +--- + +## `acreate()` Alias + +`acreate()` is a direct alias for `create()` provided for code that follows the OpenAI naming convention: + +```javascript +const response = await client.acreate({ + model: 'Qwen/Qwen3-0.6B', + messages: [{ role: 'user', content: 'Hello!' }], +}); +``` + +--- + +## Browser Usage + +In browsers, keys are kept in memory only (no file system). Everything else is identical to Node.js. + +```html + +``` + +> **Security note:** Embedding API keys in browser-side code exposes them to end users. In a real application, proxy requests through your backend or use short-lived tokens. + +--- + +## Local Development (HTTP) + +For a local NOMYO router running over plain HTTP: + +```javascript +const client = new SecureChatCompletion({ + baseUrl: 'http://localhost:12435', + allowHttp: true, // required; prints a security warning +}); +``` + +Never use `allowHttp: true` in production. diff --git a/doc/installation.md b/doc/installation.md new file mode 100644 index 0000000..7b757bd --- /dev/null +++ b/doc/installation.md @@ -0,0 +1,107 @@ +# Installation + +## Prerequisites + +- **Node.js**: 14.17 or higher (18 LTS recommended) +- **npm** / **yarn** / **pnpm** +- For TypeScript projects: TypeScript 4.7+ + +## Install from npm + +```bash +npm install nomyo-js +``` + +```bash +yarn add nomyo-js +``` + +```bash +pnpm add nomyo-js +``` + +## Browser (CDN) + +```html + +``` + +--- + +## Verify Installation + +### Node.js + +```javascript +import { SecureChatCompletion, getMemoryProtectionInfo } from 'nomyo-js'; + +const info = getMemoryProtectionInfo(); +console.log('Memory protection:', info.method); // e.g. "zero-only" +console.log('Can lock:', info.canLock); // true if native addon present + +const client = new SecureChatCompletion({ apiKey: 'test' }); +console.log('nomyo-js installed successfully'); +client.dispose(); +``` + +### CommonJS + +```javascript +const { SecureChatCompletion } = require('nomyo-js'); +``` + +## Optional: Native Memory Addon + +The pure-JS implementation zeroes buffers immediately after use but cannot prevent the OS from paging sensitive data to swap. +The optional native addon adds `mlock`/`VirtualLock` support for true OS-level memory locking. + +```bash +cd node_modules/nomyo-js/native +npm install +npm run build +``` + +Or if you installed `nomyo-native` separately: + +```bash +npm install nomyo-native +``` + +When the addon is present `getMemoryProtectionInfo()` will report `method: 'mlock'` and `canLock: true`. + +## TypeScript + +All public APIs ship with bundled type declarations โ€” no `@types/` package required. + +```typescript +import { + SecureChatCompletion, + ChatCompletionRequest, + ChatCompletionResponse, + getMemoryProtectionInfo, +} from 'nomyo-js'; +``` + +## Environment Variables + +Store secrets outside source code: + +```bash +# .env (never commit this file) +NOMYO_API_KEY=your-api-key +NOMYO_SERVER_URL=https://api.nomyo.ai +NOMYO_KEY_PASSWORD=your-key-password +``` + +```javascript +import 'dotenv/config'; // or use process.env directly +import { SecureChatCompletion } from 'nomyo-js'; + +const client = new SecureChatCompletion({ + baseUrl: process.env.NOMYO_SERVER_URL, + apiKey: process.env.NOMYO_API_KEY, +}); +``` diff --git a/doc/models.md b/doc/models.md new file mode 100644 index 0000000..046406e --- /dev/null +++ b/doc/models.md @@ -0,0 +1,85 @@ +# Available Models + +All models are available via `api.nomyo.ai`. Pass the model ID string directly to the `model` field of `create()`. + +## Model List + +| Model ID | Parameters | Type | Notes | +|---|---|---|---| +| `Qwen/Qwen3-0.6B` | 0.6B | General | Lightweight, fast inference | +| `Qwen/Qwen3.5-0.8B` | 0.8B | General | Lightweight, fast inference | +| `LiquidAI/LFM2.5-1.2B-Thinking` | 1.2B | Thinking | Reasoning model | +| `ibm-granite/granite-4.0-h-small` | Small | General | IBM Granite 4.0, enterprise-focused | +| `Qwen/Qwen3.5-9B` | 9B | General | Balanced quality and speed | +| `utter-project/EuroLLM-9B-Instruct-2512` | 9B | General | Multilingual, strong European language support | +| `zai-org/GLM-4.7-Flash` | โ€” | General | Fast GLM variant | +| `mistralai/Ministral-3-14B-Instruct-2512-GGUF` | 14B | General | Mistral instruction-tuned | +| `ServiceNow-AI/Apriel-1.6-15b-Thinker` | 15B | Thinking | Reasoning model | +| `openai/gpt-oss-20b` | 20B | General | OpenAI open-weight release | +| `LiquidAI/LFM2-24B-A2B` | 24B (2B active) | General | MoE โ€” efficient inference | +| `Qwen/Qwen3.5-27B` | 27B | General | High quality, large context | +| `google/medgemma-27b-it` | 27B | Specialized | Medical domain, instruction-tuned | +| `nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4` | 30B (3B active) | General | MoE โ€” efficient inference | +| `Qwen/Qwen3.5-35B-A3B` | 35B (3B active) | General | MoE โ€” efficient inference | +| `moonshotai/Kimi-Linear-48B-A3B-Instruct` | 48B (3B active) | General | MoE โ€” large capacity, efficient inference | + +> **MoE** (Mixture of Experts) models show total/active parameter counts. Only active parameters are used per token, keeping inference cost low relative to total model size. + +## Usage + +```javascript +import { SecureChatCompletion } from 'nomyo-js'; + +const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY }); + +const response = await client.create({ + model: 'Qwen/Qwen3.5-9B', + messages: [{ role: 'user', content: 'Hello!' }], +}); +``` + +## Choosing a Model + +| Goal | Recommended models | +|------|--------------------| +| **Low latency / edge** | `Qwen/Qwen3-0.6B`, `Qwen/Qwen3.5-0.8B`, `LiquidAI/LFM2.5-1.2B-Thinking` | +| **Balanced quality + speed** | `Qwen/Qwen3.5-9B`, `mistralai/Ministral-3-14B-Instruct-2512-GGUF` | +| **Reasoning / chain-of-thought** | `LiquidAI/LFM2.5-1.2B-Thinking`, `ServiceNow-AI/Apriel-1.6-15b-Thinker` | +| **Multilingual** | `utter-project/EuroLLM-9B-Instruct-2512` | +| **Medical** | `google/medgemma-27b-it` | +| **Highest quality** | `moonshotai/Kimi-Linear-48B-A3B-Instruct`, `Qwen/Qwen3.5-35B-A3B` | + +## Thinking Models + +Models marked **Thinking** return an additional `reasoning_content` field in the response message alongside the normal `content`. This contains the model's internal chain-of-thought: + +```javascript +const response = await client.create({ + model: 'LiquidAI/LFM2.5-1.2B-Thinking', + messages: [{ role: 'user', content: 'Is 9.9 or 9.11 larger?' }], +}); + +const { content, reasoning_content } = response.choices[0].message; +console.log('Reasoning:', reasoning_content); // internal chain-of-thought +console.log('Answer:', content); // final answer +``` + +## Security Tier Compatibility + +Not all models are available on all security tiers. If a model is not permitted for the requested tier, the server returns HTTP 403 and the client throws `ForbiddenError`. + +```javascript +import { ForbiddenError } from 'nomyo-js'; + +try { + const response = await client.create({ + model: 'Qwen/Qwen3.5-27B', + messages: [{ role: 'user', content: '...' }], + security_tier: 'maximum', + }); +} catch (err) { + if (err instanceof ForbiddenError) { + // Model not available at this security tier โ€” retry with a different tier or model + } +} +``` diff --git a/doc/rate-limits.md b/doc/rate-limits.md new file mode 100644 index 0000000..1b3bd41 --- /dev/null +++ b/doc/rate-limits.md @@ -0,0 +1,115 @@ +# Rate Limits + +The NOMYO API (`api.nomyo.ai`) enforces rate limits to ensure fair usage and service stability for all users. + +## Default Rate Limit + +By default, each API key is limited to **2 requests per second**. + +## Burst Allowance + +Short bursts above the default limit are permitted. You may send up to **4 requests per second** in burst mode, provided you have not exceeded burst usage within the current **10-second window**. + +Burst capacity is granted once per 10-second window. If you consume the burst allowance, you must wait for the window to reset before burst is available again. + +## Rate Limit Summary + +| Mode | Limit | Condition | +|------|-------|-----------| +| Default | 2 requests/second | Always active | +| Burst | 4 requests/second | Once per 10-second window | + +## Error Responses + +### 429 Too Many Requests + +Returned when your request rate exceeds the allowed limit. + +The client retries automatically (see below). If all retries are exhausted, `RateLimitError` is thrown: + +```javascript +import { SecureChatCompletion, RateLimitError } from 'nomyo-js'; + +try { + const response = await client.create({ ... }); +} catch (err) { + if (err instanceof RateLimitError) { + // All retries exhausted โ€” back off manually before trying again + console.error('Rate limit exceeded:', err.message); + } +} +``` + +### 503 Service Unavailable (Cool-down) + +Returned when burst limits are abused repeatedly. A **30-minute cool-down** is applied to the offending API key. + +**What to do:** Wait 30 minutes before retrying. Review your request patterns to ensure you stay within the permitted limits. + +## Automatic Retry Behaviour + +The client retries automatically on `429`, `500`, `502`, `503`, `504`, and network errors using exponential backoff: + +| Attempt | Delay before attempt | +|---------|----------------------| +| 1st (initial) | โ€” | +| 2nd | 1 second | +| 3rd | 2 seconds | + +The default is **2 retries** (3 total attempts). Adjust per client: + +```javascript +// More retries for high-throughput workloads +const client = new SecureChatCompletion({ + apiKey: process.env.NOMYO_API_KEY, + maxRetries: 5, +}); + +// Disable retries entirely (fail fast) +const client2 = new SecureChatCompletion({ + apiKey: process.env.NOMYO_API_KEY, + maxRetries: 0, +}); +``` + +## Best Practices + +- **Throttle requests client-side** to stay at or below 2 requests/second under normal load. +- **Use burst sparingly** โ€” it is intended for occasional spikes, not sustained high-throughput usage. +- **Increase `maxRetries`** for background jobs that can tolerate extra latency. +- **Monitor for `503` responses** โ€” repeated occurrences indicate your usage pattern is triggering the abuse threshold. +- **Parallel requests** (e.g. `Promise.all`) count against the same rate limit โ€” be careful with large batches. + +## Batch Processing Example + +Throttle parallel requests to stay within the rate limit: + +```javascript +import { SecureChatCompletion } from 'nomyo-js'; + +const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY }); + +async function throttledBatch(queries, requestsPerSecond = 2) { + const results = []; + const delayMs = 1000 / requestsPerSecond; + + for (const query of queries) { + const start = Date.now(); + + const response = await client.create({ + model: 'Qwen/Qwen3-0.6B', + messages: [{ role: 'user', content: query }], + }); + results.push(response.choices[0].message.content); + + // Throttle: wait for the remainder of the time slot + const elapsed = Date.now() - start; + if (elapsed < delayMs) { + await new Promise(resolve => setTimeout(resolve, delayMs - elapsed)); + } + } + + client.dispose(); + return results; +} +``` diff --git a/doc/security-guide.md b/doc/security-guide.md new file mode 100644 index 0000000..30e08e3 --- /dev/null +++ b/doc/security-guide.md @@ -0,0 +1,237 @@ +# Security Guide + +## Overview + +NOMYO.js provides end-to-end encryption for all communication between your application and NOMYO inference endpoints. Your prompts and responses are encrypted before leaving your process โ€” the inference server never processes plaintext. + +For the full cryptographic architecture and threat model see [SECURITY.md](../docs/SECURITY.md). + +--- + +## Encryption Mechanism + +### Hybrid Encryption + +Each request uses a two-layer scheme: + +1. **AES-256-GCM** encrypts the payload (authenticated encryption โ€” prevents tampering). +2. **RSA-OAEP-SHA256** wraps the AES key for secure key exchange. + +The server holds the RSA private key; your client generates the AES key fresh for every request. + +### Per-Request Ephemeral AES Keys + +- A new 256-bit AES key is generated for every `create()` call using the Web Crypto API. +- The key is never reused โ€” forward secrecy is ensured per request. +- The key is zeroed from memory immediately after encryption. + +### Key Exchange + +Your client's RSA public key is sent in the `X-Public-Key` request header. The server encrypts the response with it so only your client can decrypt the reply. + +--- + +## Memory Protection + +### What the Library Does + +All intermediate sensitive buffers (AES key, plaintext payload, decrypted response bytes) are wrapped in `SecureByteContext`. This guarantees they are zeroed in a `finally` block immediately after use, even if an exception occurs. + +The encrypted request body (`ArrayBuffer`) is also zeroed by the Node.js HTTP client after the data is handed to the socket. + +### Limitations (Pure JavaScript) + +JavaScript has no direct access to OS memory management. The library cannot: + +- Lock pages to prevent swapping (`mlock` / `VirtualLock`) +- Prevent the garbage collector from copying data internally +- Guarantee memory won't appear in core dumps + +**Impact:** On a system under memory pressure, sensitive data could briefly be written to swap. For environments where this is unacceptable (PHI, classified), install the optional native addon or run on a system with swap disabled. + +### Native Addon (Optional) + +The `nomyo-native` addon adds true `mlock` support. When installed, `getMemoryProtectionInfo()` reports `method: 'mlock'` and `canLock: true`: + +```javascript +import { getMemoryProtectionInfo } from 'nomyo-js'; + +const info = getMemoryProtectionInfo(); +// Without addon: { method: 'zero-only', canLock: false } +// With addon: { method: 'mlock', canLock: true } +``` + +--- + +## Minimise Response Lifetime + +The library protects all intermediate crypto material in secure memory. However, the **final parsed response object** is returned to your code, and you are responsible for how long it lives. + +```javascript +// GOOD โ€” extract what you need, then drop the response immediately +const response = await client.create({ + model: 'Qwen/Qwen3.5-9B', + messages: [{ role: 'user', content: 'Summarise patient record #1234' }], + security_tier: 'maximum', +}); +const reply = response.choices[0].message.content; +// Let response go out of scope here โ€” don't hold it in a variable +// longer than necessary + +// BAD โ€” holding the full response object in a long-lived scope +this.lastResponse = response; // stored for minutes / hours +``` + +JavaScript's `delete` and variable reassignment do not zero the underlying memory. For sensitive data (PHI, classified), process and discard as quickly as possible โ€” do not store in class attributes, global caches, or log files. + +--- + +## Key Management + +### Default Behaviour + +Keys are automatically generated on first use and saved to `client_keys/` (Node.js). On subsequent runs the saved keys are reloaded automatically. + +``` +client_keys/ + private_key.pem # permissions 0600 (owner-only) + public_key.pem # permissions 0644 +``` + +### Configure the Key Directory + +```javascript +const client = new SecureChatCompletion({ + apiKey: process.env.NOMYO_API_KEY, + keyDir: '/etc/myapp/nomyo-keys', // custom path, outside project directory +}); +``` + +### Password-Protected Keys (Recommended for Production) + +Protect key files with a password so they cannot be used even if the file is leaked: + +```javascript +import { SecureCompletionClient } from 'nomyo-js'; + +const client = new SecureCompletionClient({ routerUrl: 'https://api.nomyo.ai' }); + +await client.generateKeys({ + saveToFile: true, + keyDir: 'client_keys', + password: process.env.NOMYO_KEY_PASSWORD, // minimum 8 characters +}); +``` + +To load password-protected keys manually: + +```javascript +await client.loadKeys( + 'client_keys/private_key.pem', + 'client_keys/public_key.pem', + process.env.NOMYO_KEY_PASSWORD +); +``` + +### Key Rotation + +Keys rotate automatically every 24 hours by default. Configure or disable: + +```javascript +const client = new SecureChatCompletion({ + apiKey: process.env.NOMYO_API_KEY, + keyRotationInterval: 3600000, // rotate every hour + keyRotationDir: '/var/lib/myapp/keys', + keyRotationPassword: process.env.KEY_PWD, +}); + +// Or disable entirely for short-lived processes +const client2 = new SecureChatCompletion({ + apiKey: process.env.NOMYO_API_KEY, + keyRotationInterval: 0, +}); +``` + +### File Permissions + +Private key files are saved with `0600` permissions (owner read/write only) on Unix-like systems. Add `client_keys/` and `*.pem` to your `.gitignore` โ€” both are already included if you use this package's default `.gitignore`. + +--- + +## Security Tiers + +| Tier | Hardware | Use case | +|------|----------|----------| +| `"standard"` | GPU | General secure inference | +| `"high"` | CPU/GPU balanced | Sensitive business data, enforces secure tokenizer | +| `"maximum"` | CPU only | HIPAA PHI, classified data โ€” maximum isolation | + +Higher tiers add round-trip latency but increase hardware-level isolation. + +--- + +## HTTPS Enforcement + +The client enforces HTTPS by default. HTTP connections require explicit opt-in and print a visible warning: + +```javascript +// Production โ€” HTTPS only (default) +const client = new SecureChatCompletion({ baseUrl: 'https://api.nomyo.ai' }); + +// Local development โ€” HTTP allowed with explicit flag +const devClient = new SecureChatCompletion({ + baseUrl: 'http://localhost:12435', + allowHttp: true, // prints: "WARNING: Using HTTP instead of HTTPS..." +}); +``` + +Without `allowHttp: true`, connecting over HTTP throws `SecurityError`. + +The server's public key is fetched over HTTPS with TLS certificate verification to prevent man-in-the-middle attacks. + +--- + +## API Key Security + +API keys are sent as `Bearer` tokens in the `Authorization` header. The client validates that the key does not contain CR or LF characters to prevent HTTP header injection. + +Never hardcode API keys in source code โ€” use environment variables: + +```javascript +const client = new SecureChatCompletion({ + apiKey: process.env.NOMYO_API_KEY, +}); +``` + +--- + +## Production Checklist + +- [ ] Always use HTTPS (`allowHttp` is `false` by default) +- [ ] Load API key from environment variable, not hardcoded +- [ ] Enable `secureMemory: true` (default) +- [ ] Use password-protected key files (`keyRotationPassword`) +- [ ] Store keys outside the project directory and outside version control +- [ ] Add `client_keys/` and `*.pem` to `.gitignore` +- [ ] Call `client.dispose()` when the client is no longer needed +- [ ] Consider the native addon if swap-file exposure is unacceptable + +--- + +## Compliance Considerations + +### HIPAA + +For Protected Health Information (PHI): +- Use `security_tier: 'maximum'` on requests containing PHI +- Enable password-protected key files +- Ensure HTTPS is enforced (the default) +- Minimise response lifetime in memory (extract, use, discard) + +### Data Classification + +| Classification | Recommended tier | +|---------------|-----------------| +| Public / internal | `"standard"` | +| Confidential business data | `"high"` | +| PHI, PII, classified | `"maximum"` | diff --git a/doc/troubleshooting.md b/doc/troubleshooting.md new file mode 100644 index 0000000..2ac32e1 --- /dev/null +++ b/doc/troubleshooting.md @@ -0,0 +1,314 @@ +# Troubleshooting + +## Authentication Errors + +### `AuthenticationError: Invalid or missing API key` + +The server rejected your API key. + +**Causes and fixes:** + +- Key not set โ€” pass `apiKey` to the constructor or use `process.env.NOMYO_API_KEY`. +- Key has leading/trailing whitespace โ€” check the value with `console.log(JSON.stringify(process.env.NOMYO_API_KEY))`. +- Key contains CR or LF characters โ€” the client rejects keys with `\r` or `\n` and throws `SecurityError` before the request is sent. Regenerate the key. + +```javascript +const client = new SecureChatCompletion({ + apiKey: process.env.NOMYO_API_KEY, // never hardcode +}); +``` + +--- + +## Connection Errors + +### `APIConnectionError: Network error` / `connect ECONNREFUSED` + +The client could not reach the router. + +**Check:** + +1. `baseUrl` is correct โ€” the default is `https://api.nomyo.ai` (port **12435**). +2. You have network access to the host. +3. TLS is not being blocked by a proxy or firewall. + +### `SecurityError: HTTPS is required` + +You passed an `http://` URL without setting `allowHttp: true`. + +```javascript +// Local dev only +const client = new SecureChatCompletion({ + baseUrl: 'http://localhost:12435', + allowHttp: true, +}); +``` + +Never set `allowHttp: true` in production โ€” the server public key fetch and all request data would travel unencrypted. + +### `APIConnectionError: Request timed out` + +The default timeout is 60 seconds. Larger models or busy endpoints may need more: + +```javascript +const client = new SecureChatCompletion({ + apiKey: process.env.NOMYO_API_KEY, + timeout: 120000, // 2 minutes +}); +``` + +--- + +## Key Loading Failures + +### `Error: Failed to load keys: no such file or directory` + +The `keyDir` directory or the PEM files inside it don't exist. On first run the library generates and saves a new key pair automatically. If you specified a custom `keyDir`, make sure the directory is writable: + +```javascript +const client = new SecureChatCompletion({ + apiKey: process.env.NOMYO_API_KEY, + keyDir: '/var/lib/myapp/nomyo-keys', // directory must exist and be writable +}); +``` + +### `Error: Invalid passphrase` / `Error: Failed to decrypt private key` + +The password you passed to `loadKeys()` or `keyRotationPassword` doesn't match what was used to encrypt the file. + +```javascript +await client.loadKeys( + 'client_keys/private_key.pem', + 'client_keys/public_key.pem', + process.env.NOMYO_KEY_PASSWORD, // must match the password used on generateKeys() +); +``` + +### `Error: RSA key too small` + +The library enforces a minimum key size of 2048 bits. If you have old 1024-bit keys, regenerate them: + +```javascript +await client.generateKeys({ + saveToFile: true, + keyDir: 'client_keys', + keySize: 4096, // recommended +}); +``` + +### `Error: Failed to load keys` (browser) + +Key loading from files is a Node.js-only feature. In browsers, keys are generated in memory on first use. Do not call `loadKeys()` in a browser context. + +--- + +## Rate Limit Errors + +### `RateLimitError: Rate limit exceeded` + +All automatic retries were exhausted. The default limit is 2 requests/second; burst allows 4 requests/second once per 10-second window. + +**Fixes:** + +- Reduce concurrency โ€” avoid large `Promise.all` batches. +- Add client-side throttling (see [Rate Limits](rate-limits.md)). +- Increase `maxRetries` so the client backs off longer before giving up: + +```javascript +const client = new SecureChatCompletion({ + apiKey: process.env.NOMYO_API_KEY, + maxRetries: 5, +}); +``` + +### `ServiceUnavailableError` with 30-minute cool-down + +Burst limits were hit repeatedly and a cool-down was applied to your key. Wait 30 minutes, then review your request patterns. + +--- + +## Model / Tier Errors + +### `ForbiddenError: Model not allowed for this security tier` + +The model you requested is not available at the security tier you specified. Try a lower tier or a different model: + +```javascript +// If 'maximum' tier rejects the model, try 'high' or 'standard' +const response = await client.create({ + model: 'Qwen/Qwen3.5-27B', + messages: [...], + security_tier: 'high', // try 'standard' if still rejected +}); +``` + +See [Models โ€” Security Tier Compatibility](models.md#security-tier-compatibility) for details. + +--- + +## Crypto / Security Errors + +### `SecurityError: Decryption failed` + +The response could not be decrypted. This is intentionally vague to avoid leaking crypto details. + +**Possible causes:** + +- The server returned a malformed response (check `debug: true` output). +- A network proxy modified the response body. +- The server's public key changed mid-session โ€” the next request will re-fetch it automatically. + +Enable debug mode to log the raw response and narrow the cause: + +```javascript +const client = new SecureChatCompletion({ + apiKey: process.env.NOMYO_API_KEY, + debug: true, +}); +``` + +### `Error: Unsupported protocol version` / `Error: Unsupported encryption algorithm` + +The server sent a response in a protocol version or with an encryption algorithm not supported by this client version. Update the package: + +```bash +npm update nomyo-js +``` + +--- + +## `DisposedError`: Method called after `dispose()` + +You called a method on a client that has already been disposed. + +```javascript +client.dispose(); +await client.create(...); // throws DisposedError +``` + +Create a new client instance if you need to make more requests after disposal. + +--- + +## Memory Protection Warnings + +### `getMemoryProtectionInfo()` returns `method: 'zero-only'` + +This is normal for a pure JavaScript installation. The library zeroes sensitive buffers immediately after use but cannot lock pages to prevent swapping (OS `mlock` requires a native addon). + +```javascript +import { getMemoryProtectionInfo } from 'nomyo-js'; + +const info = getMemoryProtectionInfo(); +// { method: 'zero-only', canLock: false, isPlatformSecure: false } +``` + +For environments where swap-file exposure is unacceptable (HIPAA PHI, classified data), install the optional `nomyo-native` addon or run on a system with swap disabled. + +--- + +## Node.js-Specific Issues + +### `ReferenceError: crypto is not defined` + +In CommonJS modules on Node.js before v19, `crypto` is not a global. Import it explicitly: + +```javascript +// CommonJS +const { webcrypto } = require('crypto'); +global.crypto = webcrypto; + +// Or switch to ES modules (recommended) +// package.json: "type": "module" +``` + +The library itself imports `crypto` correctly โ€” this error only appears if your own application code tries to use `crypto` directly. + +### `SyntaxError: Cannot use import statement in a module` / CommonJS vs ESM + +The package ships both CommonJS (`dist/node/`) and ESM (`dist/esm/`) builds. Node.js selects the correct one automatically via `package.json` `exports`. If you see import errors, check that your `package.json` or bundler is not forcing the wrong format. + +For ESM: set `"type": "module"` in your `package.json` or use `.mjs` file extensions. +For CommonJS: use `require('nomyo-js')` or `.cjs` extensions. + +### TypeScript: `Cannot find module 'nomyo-js'` / missing types + +Ensure your `tsconfig.json` includes `"moduleResolution": "bundler"` or `"moduleResolution": "node16"` and that `nomyo-js` is in `dependencies` (not just `devDependencies`): + +```bash +npm install nomyo-js +``` + +--- + +## Browser-Specific Issues + +### `Content Security Policy blocked` + +If your app's CSP restricts `script-src` or `connect-src`, add the NOMYO API domain: + +``` +Content-Security-Policy: connect-src https://api.nomyo.ai; +``` + +### `TypeError: Failed to fetch` (CORS) + +The NOMYO API includes CORS headers. If you see CORS errors in a browser, verify the `baseUrl` is correct (HTTPS, correct port) and that no browser extension is blocking the request. + +### Keys not persisted across page reloads + +This is expected behaviour โ€” browsers do not have file system access. Keys are generated fresh on each page load. If you need persistent keys in a browser context, implement your own `loadKeys`/`generateKeys` wrapper using `localStorage` or `IndexedDB` (not recommended for high-security scenarios). + +--- + +## Debugging Tips + +### Enable verbose logging + +```javascript +const client = new SecureChatCompletion({ + apiKey: process.env.NOMYO_API_KEY, + debug: true, +}); +``` + +Debug mode logs: key generation/loading, server public key fetches, request encryption details, retry attempts, and response decryption. + +### Check memory protection status + +```javascript +import { getMemoryProtectionInfo } from 'nomyo-js'; +console.log(getMemoryProtectionInfo()); +``` + +### Inspect response metadata + +The `_metadata` field in every response carries server-side diagnostics: + +```javascript +const response = await client.create({ ... }); +console.log(response._metadata); +// { +// payload_id: '...', +// is_encrypted: true, +// encryption_algorithm: 'hybrid-aes256-rsa4096', +// security_tier: 'standard', +// memory_protection: { ... }, +// } +``` + +### Test with minimum configuration + +Strip all optional configuration and test with the simplest possible call to isolate the issue: + +```javascript +import { SecureChatCompletion } from 'nomyo-js'; + +const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY }); +const r = await client.create({ + model: 'Qwen/Qwen3-0.6B', + messages: [{ role: 'user', content: 'ping' }], +}); +console.log(r.choices[0].message.content); +client.dispose(); +``` diff --git a/docs/SECURITY.md b/docs/SECURITY.md index 3029488..e229b6a 100644 --- a/docs/SECURITY.md +++ b/docs/SECURITY.md @@ -143,7 +143,7 @@ const client = new SecureChatCompletion({ baseUrl: 'https://...' }); # .env file (never commit to git) NOMYO_API_KEY=your-api-key NOMYO_KEY_PASSWORD=your-key-password -NOMYO_SERVER_URL=https://api.nomyo.ai:12434 +NOMYO_SERVER_URL=https://api.nomyo.ai ``` --- diff --git a/examples/node/basic.js b/examples/node/basic.js index c29f530..72e54fb 100644 --- a/examples/node/basic.js +++ b/examples/node/basic.js @@ -7,7 +7,7 @@ import { SecureChatCompletion } from 'nomyo-js'; async function main() { // Initialize client const client = new SecureChatCompletion({ - baseUrl: 'https://api.nomyo.ai:12434', + baseUrl: 'https://api.nomyo.ai', // For local development, use: // baseUrl: 'http://localhost:12434', // allowHttp: true diff --git a/examples/node/with-tools.js b/examples/node/with-tools.js index 937eac9..aa15b72 100644 --- a/examples/node/with-tools.js +++ b/examples/node/with-tools.js @@ -6,7 +6,7 @@ import { SecureChatCompletion } from 'nomyo-js'; async function main() { const client = new SecureChatCompletion({ - baseUrl: 'https://api.nomyo.ai:12434' + baseUrl: 'https://api.nomyo.ai' }); try { diff --git a/src/api/SecureChatCompletion.ts b/src/api/SecureChatCompletion.ts index dd36c67..0f18018 100644 --- a/src/api/SecureChatCompletion.ts +++ b/src/api/SecureChatCompletion.ts @@ -15,7 +15,7 @@ export class SecureChatCompletion { constructor(config: ChatCompletionConfig = {}) { const { - baseUrl = 'https://api.nomyo.ai:12435', + baseUrl = 'https://api.nomyo.ai', allowHttp = false, apiKey, secureMemory = true, diff --git a/src/core/SecureCompletionClient.ts b/src/core/SecureCompletionClient.ts index 1edb1cd..1ed7872 100644 --- a/src/core/SecureCompletionClient.ts +++ b/src/core/SecureCompletionClient.ts @@ -79,9 +79,9 @@ export class SecureCompletionClient { // Promise-based mutex: serialises concurrent ensureKeys() calls private ensureKeysLock: Promise = Promise.resolve(); - constructor(config: ClientConfig = { routerUrl: 'https://api.nomyo.ai:12435' }) { + constructor(config: ClientConfig = { routerUrl: 'https://api.nomyo.ai' }) { const { - routerUrl = 'https://api.nomyo.ai:12435', + routerUrl = 'https://api.nomyo.ai', allowHttp = false, secureMemory = true, keySize = 4096, diff --git a/src/types/client.ts b/src/types/client.ts index 342e6f4..5422ea7 100644 --- a/src/types/client.ts +++ b/src/types/client.ts @@ -3,7 +3,7 @@ */ export interface ClientConfig { - /** Base URL of the NOMYO router (e.g., https://api.nomyo.ai:12434) */ + /** Base URL of the NOMYO router (e.g., https://api.nomyo.ai) */ routerUrl: string; /** Allow HTTP connections (ONLY for local development, never in production) */