package ai.nomyo; import ai.nomyo.errors.*; import lombok.Getter; import java.util.List; import java.util.Map; /** * High-level OpenAI-compatible entrypoint for the NOMYO secure API. * *
This class provides a familiar API surface matching {@code openai.ChatCompletion.create()}. * All requests are automatically encrypted using hybrid AES-256-GCM + RSA-4096 encryption * before being sent to the NOMYO router.
* *{@code
* SecureChatCompletion client = new SecureChatCompletion(
* "https://api.nomyo.ai",
* false,
* "your-api-key",
* true,
* "/path/to/keys",
* 2
* );
*
* Map response = client.create(
* "Qwen/Qwen3-0.6B",
* List.of(Map.of("role", "user", "content", "Hello, world!"))
* );
* }
*
* Streaming is not supported. The server rejects streaming requests with HTTP 400. * Always use {@code stream=false} (the default).
* *The {@code security_tier} parameter controls the hardware isolation level:
*Set {@code keyDir} to a directory path to persist RSA keys to disk. * Keys are generated on first use and reused across all calls. * Set {@code keyDir} to {@code null} for ephemeral keys (in-memory only, lost on restart).
*/ @Getter public class SecureChatCompletion { private final SecureCompletionClient client; private final String apiKey; private final String keyDir; /** * Constructs a {@code SecureChatCompletion} with default settings. * *Uses the default NOMYO router URL ({@code https://api.nomyo.ai}), * HTTPS-only, secure memory enabled, ephemeral keys, and 2 retries.
*/ public SecureChatCompletion() { this(Constants.DEFAULT_BASE_URL, false, null, true, null, Constants.DEFAULT_MAX_RETRIES); } /** * Constructs a {@code SecureChatCompletion} with the specified settings. * * @param baseUrl NOMYO Router base URL (HTTPS enforced unless {@code allowHttp} is {@code true}) * @param allowHttp permit {@code http://} URLs (development only) * @param apiKey Bearer token for authentication (can also be passed per-call via {@link #create}) * @param secureMemory enable memory locking/zeroing (warns if unavailable) * @param keyDir directory to persist RSA keys; {@code null} = ephemeral (in-memory only) * @param maxRetries retries on 429/500/502/503/504 + network errors (exponential backoff: 1s, 2s, 4s…) */ public SecureChatCompletion( String baseUrl, boolean allowHttp, String apiKey, boolean secureMemory, String keyDir, int maxRetries ) { this.client = new SecureCompletionClient(baseUrl, allowHttp, secureMemory, maxRetries); this.apiKey = apiKey; this.keyDir = keyDir; } /** * Creates a chat completion with the specified parameters. * *This is the main entrypoint, with the same signature as * {@code openai.ChatCompletion.create()}. Returns a map (not an object) * containing the OpenAI-compatible response.
* *| Param | Type | Required | Description |
|---|---|---|---|
| {@code model} | {@code String} | yes | Model identifier, e.g. "Qwen/Qwen3-0.6B" |
| {@code messages} | {@code List | yes | OpenAI-format messages |
| {@code temperature} | {@code Double} | no | 0–2 |
| {@code maxTokens} | {@code Integer} | no | Maximum tokens in response |
| {@code topP} | {@code Double} | no | Top-p sampling parameter |
| {@code stop} | {@code String | List | no | Stop sequences |
| {@code presencePenalty} | {@code Double} | no | -2.0 to 2.0 |
| {@code frequencyPenalty} | {@code Double} | no | -2.0 to 2.0 |
| {@code n} | {@code Integer} | no | Number of completions |
| {@code bestOf} | {@code Integer} | no | |
| {@code seed} | {@code Integer} | no | Reproducibility seed |
| {@code logitBias} | {@code Map | no | Token bias map |
| {@code user} | {@code String} | no | End-user identifier |
| {@code tools} | {@code List | no | Tool definitions passed through to llama.cpp |
| {@code toolChoice} | {@code String} | no | "auto", "none", or specific tool name |
| {@code responseFormat} | {@code Map} | no | {"type": "json_object"} or {"type": "json_schema", ...} |
| {@code stream} | {@code Boolean} | no | NOT supported. Server rejects with HTTP 400. Always use {@code false}. |
| {@code baseUrl} | {@code String} | no | Per-call override (creates temp client internally) |
| {@code securityTier} | {@code String} | no | "standard", "high", or "maximum". Invalid values raise {@code ValueError}. |
| {@code apiKey} | {@code String} | no | Per-call override of instance {@code apiKey}. |