Initial commit

2026-04-21 17:24:11 +02:00 · 2026-04-21 17:24:11 +02:00 · 8acf584d28
commit 8acf584d28
24 changed files with 2408 additions and 0 deletions
--- a/src/main/java/ai/nomyo/SecureChatCompletion.java
+++ b/src/main/java/ai/nomyo/SecureChatCompletion.java
@ -0,0 +1,188 @@
+package ai.nomyo;
+
+import ai.nomyo.errors.*;
+import lombok.Getter;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * High-level OpenAI-compatible entrypoint for the NOMYO secure API.
+ *
+ * <p>This class provides a familiar API surface matching {@code openai.ChatCompletion.create()}.
+ * All requests are automatically encrypted using hybrid AES-256-GCM + RSA-4096 encryption
+ * before being sent to the NOMYO router.</p>
+ *
+ * <h3>Usage</h3>
+ * <pre>{@code
+ * SecureChatCompletion client = new SecureChatCompletion(
+ *     "https://api.nomyo.ai",
+ *     false,
+ *     "your-api-key",
+ *     true,
+ *     "/path/to/keys",
+ *     2
+ * );
+ *
+ * Map<String, Object> response = client.create(
+ *     "Qwen/Qwen3-0.6B",
+ *     List.of(Map.of("role", "user", "content", "Hello, world!"))
+ * );
+ * }</pre>
+ *
+ * <h3>Streaming</h3>
+ * <p>Streaming is <b>not supported</b>. The server rejects streaming requests with HTTP 400.
+ * Always use {@code stream=false} (the default).</p>
+ *
+ * <h3>Security Tiers</h3>
+ * <p>The {@code security_tier} parameter controls the hardware isolation level:</p>
+ * <ul>
+ *   <li>{@code "standard"} — GPU inference (general secure inference)</li>
+ *   <li>{@code "high"} — CPU/GPU (sensitive business data)</li>
+ *   <li>{@code "maximum"} — CPU only (PHI, classified data)</li>
+ * </ul>
+ *
+ * <h3>Key Persistence</h3>
+ * <p>Set {@code keyDir} to a directory path to persist RSA keys to disk.
+ * Keys are generated on first use and reused across all calls.
+ * Set {@code keyDir} to {@code null} for ephemeral keys (in-memory only, lost on restart).</p>
+ */
+@Getter
+public class SecureChatCompletion {
+
+    private final SecureCompletionClient client;
+    private final String apiKey;
+    private final String keyDir;
+
+    /**
+     * Constructs a {@code SecureChatCompletion} with default settings.
+     *
+     * <p>Uses the default NOMYO router URL ({@code https://api.nomyo.ai}),
+     * HTTPS-only, secure memory enabled, ephemeral keys, and 2 retries.</p>
+     */
+    public SecureChatCompletion() {
+        this(Constants.DEFAULT_BASE_URL, false, null, true, null, Constants.DEFAULT_MAX_RETRIES);
+    }
+
+    /**
+     * Constructs a {@code SecureChatCompletion} with the specified settings.
+     *
+     * @param baseUrl       NOMYO Router base URL (HTTPS enforced unless {@code allowHttp} is {@code true})
+     * @param allowHttp     permit {@code http://} URLs (development only)
+     * @param apiKey        Bearer token for authentication (can also be passed per-call via {@link #create})
+     * @param secureMemory  enable memory locking/zeroing (warns if unavailable)
+     * @param keyDir        directory to persist RSA keys; {@code null} = ephemeral (in-memory only)
+     * @param maxRetries    retries on 429/500/502/503/504 + network errors (exponential backoff: 1s, 2s, 4s…)
+     */
+    public SecureChatCompletion(
+            String baseUrl,
+            boolean allowHttp,
+            String apiKey,
+            boolean secureMemory,
+            String keyDir,
+            int maxRetries
+    ) {
+        this.client = new SecureCompletionClient(baseUrl, allowHttp, secureMemory, maxRetries);
+        this.apiKey = apiKey;
+        this.keyDir = keyDir;
+    }
+
+    /**
+     * Creates a chat completion with the specified parameters.
+     *
+     * <p>This is the main entrypoint, with the same signature as
+     * {@code openai.ChatCompletion.create()}. Returns a map (not an object)
+     * containing the OpenAI-compatible response.</p>
+     *
+     * <h3>Parameters</h3>
+     * <table>
+     *   <tr><th>Param</th><th>Type</th><th>Required</th><th>Description</th></tr>
+     *   <tr><td>{@code model}</td><td>{@code String}</td><td>yes</td><td>Model identifier, e.g. "Qwen/Qwen3-0.6B"</td></tr>
+     *   <tr><td>{@code messages}</td><td>{@code List<Map>}</td><td>yes</td><td>OpenAI-format messages</td></tr>
+     *   <tr><td>{@code temperature}</td><td>{@code Double}</td><td>no</td><td>0–2</td></tr>
+     *   <tr><td>{@code maxTokens}</td><td>{@code Integer}</td><td>no</td><td>Maximum tokens in response</td></tr>
+     *   <tr><td>{@code topP}</td><td>{@code Double}</td><td>no</td><td>Top-p sampling parameter</td></tr>
+     *   <tr><td>{@code stop}</td><td>{@code String | List<String>}</td><td>no</td><td>Stop sequences</td></tr>
+     *   <tr><td>{@code presencePenalty}</td><td>{@code Double}</td><td>no</td><td>-2.0 to 2.0</td></tr>
+     *   <tr><td>{@code frequencyPenalty}</td><td>{@code Double}</td><td>no</td><td>-2.0 to 2.0</td></tr>
+     *   <tr><td>{@code n}</td><td>{@code Integer}</td><td>no</td><td>Number of completions</td></tr>
+     *   <tr><td>{@code bestOf}</td><td>{@code Integer}</td><td>no</td><td></td></tr>
+     *   <tr><td>{@code seed}</td><td>{@code Integer}</td><td>no</td><td>Reproducibility seed</td></tr>
+     *   <tr><td>{@code logitBias}</td><td>{@code Map<String, Double>}</td><td>no</td><td>Token bias map</td></tr>
+     *   <tr><td>{@code user}</td><td>{@code String}</td><td>no</td><td>End-user identifier</td></tr>
+     *   <tr><td>{@code tools}</td><td>{@code List<Map>}</td><td>no</td><td>Tool definitions passed through to llama.cpp</td></tr>
+     *   <tr><td>{@code toolChoice}</td><td>{@code String}</td><td>no</td><td>"auto", "none", or specific tool name</td></tr>
+     *   <tr><td>{@code responseFormat}</td><td>{@code Map}</td><td>no</td><td>{"type": "json_object"} or {"type": "json_schema", ...}</td></tr>
+     *   <tr><td>{@code stream}</td><td>{@code Boolean}</td><td>no</td><td><b>NOT supported.</b> Server rejects with HTTP 400. Always use {@code false}.</td></tr>
+     *   <tr><td>{@code baseUrl}</td><td>{@code String}</td><td>no</td><td>Per-call override (creates temp client internally)</td></tr>
+     *   <tr><td>{@code securityTier}</td><td>{@code String}</td><td>no</td><td>"standard", "high", or "maximum". Invalid values raise {@code ValueError}.</td></tr>
+     *   <tr><td>{@code apiKey}</td><td>{@code String}</td><td>no</td><td>Per-call override of instance {@code apiKey}.</td></tr>
+     * </table>
+     *
+     * @param model     model identifier (required)
+     * @param messages  OpenAI-format message list (required)
+     * @param kwargs    additional OpenAI-compatible parameters
+     * @return OpenAI-compatible response map (see §6.2 of reference docs)
+     * @throws SecurityError       if encryption/decryption fails
+     * @throws APIConnectionError  if a network error occurs
+     * @throws InvalidRequestError if the API returns 400
+     * @throws AuthenticationError if the API returns 401
+     * @throws ForbiddenError      if the API returns 403
+     * @throws RateLimitError      if the API returns 429
+     * @throws ServerError         if the API returns 500
+     * @throws ServiceUnavailableError if the API returns 503
+     * @throws APIError            for other errors
+     */
+    public Map<String, Object> create(String model, List<Map<String, Object>> messages, Map<String, Object> kwargs) {
+        // Build payload from model, messages, and kwargs
+        // Validate stream is false
+        // Validate securityTier if provided
+        // Use per-call api_key override if provided, else instance apiKey
+        // Create temp client if baseUrl override provided
+        // Send secure request
+        // Return decrypted response map
+        return null;
+    }
+
+    /**
+     * Creates a chat completion with the specified model and messages.
+     * Convenience variant with no additional parameters.
+     *
+     * @param model     model identifier (required)
+     * @param messages  OpenAI-format message list (required)
+     * @return OpenAI-compatible response map
+     */
+    public Map<String, Object> create(String model, List<Map<String, Object>> messages) {
+        return create(model, messages, null);
+    }
+
+    /**
+     * Async alias for {@link #create(String, List, Map)}. Identical behavior.
+     *
+     * @param model     model identifier (required)
+     * @param messages  OpenAI-format message list (required)
+     * @param kwargs    additional OpenAI-compatible parameters
+     * @return OpenAI-compatible response map
+     */
+    public Map<String, Object> acreate(String model, List<Map<String, Object>> messages, Map<String, Object> kwargs) {
+        return create(model, messages, kwargs);
+    }
+
+    /**
+     * Async alias for {@link #create(String, List)}. Identical behavior.
+     *
+     * @param model     model identifier (required)
+     * @param messages  OpenAI-format message list (required)
+     * @return OpenAI-compatible response map
+     */
+    public Map<String, Object> acreate(String model, List<Map<String, Object>> messages) {
+        return create(model, messages);
+    }
+
+    /**
+     * Closes the client and releases any resources.
+     */
+    public void close() {
+        client.close();
+    }
+}