fix: base_url

doc: created
2026-04-16 16:44:26 +02:00 · 2026-04-16 16:44:26 +02:00 · 43165f86f2
commit 43165f86f2
parent 6e02559f4e
17 changed files with 2151 additions and 293 deletions
--- a/doc/getting-started.md
+++ b/doc/getting-started.md
@ -0,0 +1,279 @@
+# Getting Started
+
+## Overview
+
+NOMYO.js provides end-to-end encryption for all communication between your application and NOMYO inference endpoints. Your prompts and responses are encrypted before leaving your process and decrypted only after they arrive back — the server never sees plaintext.
+
+The API mirrors OpenAI's `ChatCompletion`, making it easy to integrate into existing code.
+
+> **Note on streaming:** The API is non-streaming. Setting `stream: true` in a request is ignored server-side to maintain full response encryption.
+
+---
+
+## Simple Chat Completion
+
+```javascript
+import { SecureChatCompletion } from 'nomyo-js';
+
+const client = new SecureChatCompletion({
+  apiKey: process.env.NOMYO_API_KEY,
+});
+
+const response = await client.create({
+  model: 'Qwen/Qwen3-0.6B',
+  messages: [{ role: 'user', content: 'Hello! How are you today?' }],
+  temperature: 0.7,
+});
+
+// Extract what you need, then let the response go out of scope promptly.
+// This minimises the time decrypted data lives in process memory
+// (reduces exposure from swap files, core dumps, or memory inspection).
+const reply = response.choices[0].message.content;
+console.log(reply);
+```
+
+### With a System Message
+
+```javascript
+const response = await client.create({
+  model: 'Qwen/Qwen3-0.6B',
+  messages: [
+    { role: 'system', content: 'You are a helpful assistant.' },
+    { role: 'user',   content: 'What is the capital of France?' },
+  ],
+  temperature: 0.7,
+});
+
+console.log(response.choices[0].message.content);
+```
+
+---
+
+## API Key Authentication
+
+```javascript
+// Constructor-level key (used for all requests from this instance)
+const client = new SecureChatCompletion({
+  apiKey: process.env.NOMYO_API_KEY,
+});
+
+// Per-request key override (takes precedence over constructor key)
+const response = await client.create({
+  model: 'Qwen/Qwen3-0.6B',
+  messages: [{ role: 'user', content: 'Hello!' }],
+  api_key: 'override-key-for-this-request',
+});
+```
+
+---
+
+## Security Tiers
+
+Pass `security_tier` in the request to control hardware routing and isolation level:
+
+
+| Tier         | Use case                                              |
+| -------------- | ------------------------------------------------------- |
+| `"standard"` | General secure inference (GPU)                        |
+| `"high"`     | Sensitive business data — enforces secure tokenizer  |
+| `"maximum"`  | HIPAA PHI, classified data — E2EE, maximum isolation |
+
+```javascript
+// Standard — general use
+const response = await client.create({
+  model: 'Qwen/Qwen3-0.6B',
+  messages: [{ role: 'user', content: 'General query' }],
+  security_tier: 'standard',
+});
+
+// High — sensitive business data
+const response2 = await client.create({
+  model: 'Qwen/Qwen3-0.6B',
+  messages: [{ role: 'user', content: 'Summarise this contract clause...' }],
+  security_tier: 'high',
+});
+
+// Maximum — PHI / classified data
+const response3 = await client.create({
+  model: 'Qwen/Qwen3-0.6B',
+  messages: [{ role: 'user', content: 'Patient record summary...' }],
+  security_tier: 'maximum',
+});
+```
+
+> Using `"high"` or `"maximum"` adds latency vs `"standard"` due to additional isolation measures.
+
+---
+
+## Using Tools (Function Calling)
+
+```javascript
+const response = await client.create({
+  model: 'Qwen/Qwen3-0.6B',
+  messages: [{ role: 'user', content: "What's the weather in Paris?" }],
+  tools: [
+    {
+      type: 'function',
+      function: {
+        name: 'get_weather',
+        description: 'Get weather information for a location',
+        parameters: {
+          type: 'object',
+          properties: {
+            location: { type: 'string', description: 'City name' },
+          },
+          required: ['location'],
+        },
+      },
+    },
+  ],
+  tool_choice: 'auto',
+  temperature: 0.7,
+});
+
+const message = response.choices[0].message;
+if (message.tool_calls) {
+  const call = message.tool_calls[0];
+  console.log('Tool called:', call.function.name);
+  console.log('Arguments:', call.function.arguments);
+}
+```
+
+---
+
+## Error Handling
+
+Import typed error classes to distinguish failure modes:
+
+```javascript
+import {
+  SecureChatCompletion,
+  AuthenticationError,
+  RateLimitError,
+  InvalidRequestError,
+  ForbiddenError,
+  ServerError,
+  ServiceUnavailableError,
+  APIConnectionError,
+  SecurityError,
+} from 'nomyo-js';
+
+const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY });
+
+try {
+  const response = await client.create({
+    model: 'Qwen/Qwen3-0.6B',
+    messages: [{ role: 'user', content: 'Hello!' }],
+  });
+  console.log(response.choices[0].message.content);
+
+} catch (err) {
+  if (err instanceof AuthenticationError) {
+    console.error('Bad API key:', err.message);
+
+  } else if (err instanceof RateLimitError) {
+    // The client already retried automatically (default: 2 retries).
+    // If you reach here, all retries were exhausted.
+    console.error('Rate limit exceeded after retries:', err.message);
+
+  } else if (err instanceof ForbiddenError) {
+    // Model not allowed for the requested security_tier
+    console.error('Forbidden:', err.message);
+
+  } else if (err instanceof InvalidRequestError) {
+    console.error('Bad request:', err.message);
+
+  } else if (err instanceof ServerError || err instanceof ServiceUnavailableError) {
+    console.error('Server error (retries exhausted):', err.message);
+
+  } else if (err instanceof APIConnectionError) {
+    console.error('Network error (retries exhausted):', err.message);
+
+  } else if (err instanceof SecurityError) {
+    console.error('Encryption/decryption failure:', err.message);
+
+  } else {
+    throw err;   // re-throw unexpected errors
+  }
+}
+```
+
+All typed errors expose:
+
+- `message: string` — human-readable description
+- `statusCode?: number` — HTTP status (where applicable)
+- `errorDetails?: object` — raw response body (where applicable)
+
+---
+
+## Resource Management
+
+Always call `dispose()` when you're done with a client to stop the background key-rotation timer and zero in-memory key material:
+
+```javascript
+const client = new SecureChatCompletion({ apiKey: process.env.NOMYO_API_KEY });
+
+try {
+  const response = await client.create({ ... });
+  console.log(response.choices[0].message.content);
+} finally {
+  client.dispose();
+}
+```
+
+For long-running servers (HTTP handlers, daemons), create one client instance and reuse it — don't create a new one per request.
+
+---
+
+## `acreate()` Alias
+
+`acreate()` is a direct alias for `create()` provided for code that follows the OpenAI naming convention:
+
+```javascript
+const response = await client.acreate({
+  model: 'Qwen/Qwen3-0.6B',
+  messages: [{ role: 'user', content: 'Hello!' }],
+});
+```
+
+---
+
+## Browser Usage
+
+In browsers, keys are kept in memory only (no file system). Everything else is identical to Node.js.
+
+```html
+<script type="module">
+  import { SecureChatCompletion } from 'https://unpkg.com/nomyo-js/dist/browser/index.js';
+
+  const client = new SecureChatCompletion({
+    baseUrl: 'https://api.nomyo.ai',
+    apiKey: 'your-api-key',
+  });
+
+  const response = await client.create({
+    model: 'Qwen/Qwen3-0.6B',
+    messages: [{ role: 'user', content: 'Hello from the browser!' }],
+  });
+
+  console.log(response.choices[0].message.content);
+  client.dispose();
+</script>
+```
+
+> **Security note:** Embedding API keys in browser-side code exposes them to end users. In a real application, proxy requests through your backend or use short-lived tokens.
+
+---
+
+## Local Development (HTTP)
+
+For a local NOMYO router running over plain HTTP:
+
+```javascript
+const client = new SecureChatCompletion({
+  baseUrl: 'http://localhost:12435',
+  allowHttp: true,   // required; prints a security warning
+});
+```
+
+Never use `allowHttp: true` in production.