nomyo4J/src/main/java/ai/nomyo/SecureChatCompletion.java

189 lines
8.6 KiB
Java
Raw Normal View History

2026-04-21 17:24:11 +02:00
package ai.nomyo;
import ai.nomyo.errors.*;
import lombok.Getter;
import java.util.List;
import java.util.Map;
/**
* High-level OpenAI-compatible entrypoint for the NOMYO secure API.
*
* <p>This class provides a familiar API surface matching {@code openai.ChatCompletion.create()}.
* All requests are automatically encrypted using hybrid AES-256-GCM + RSA-4096 encryption
* before being sent to the NOMYO router.</p>
*
* <h3>Usage</h3>
* <pre>{@code
* SecureChatCompletion client = new SecureChatCompletion(
* "https://api.nomyo.ai",
* false,
* "your-api-key",
* true,
* "/path/to/keys",
* 2
* );
*
* Map<String, Object> response = client.create(
* "Qwen/Qwen3-0.6B",
* List.of(Map.of("role", "user", "content", "Hello, world!"))
* );
* }</pre>
*
* <h3>Streaming</h3>
* <p>Streaming is <b>not supported</b>. The server rejects streaming requests with HTTP 400.
* Always use {@code stream=false} (the default).</p>
*
* <h3>Security Tiers</h3>
* <p>The {@code security_tier} parameter controls the hardware isolation level:</p>
* <ul>
* <li>{@code "standard"} GPU inference (general secure inference)</li>
* <li>{@code "high"} CPU/GPU (sensitive business data)</li>
* <li>{@code "maximum"} CPU only (PHI, classified data)</li>
* </ul>
*
* <h3>Key Persistence</h3>
* <p>Set {@code keyDir} to a directory path to persist RSA keys to disk.
* Keys are generated on first use and reused across all calls.
* Set {@code keyDir} to {@code null} for ephemeral keys (in-memory only, lost on restart).</p>
*/
@Getter
public class SecureChatCompletion {
private final SecureCompletionClient client;
private final String apiKey;
private final String keyDir;
/**
* Constructs a {@code SecureChatCompletion} with default settings.
*
* <p>Uses the default NOMYO router URL ({@code https://api.nomyo.ai}),
* HTTPS-only, secure memory enabled, ephemeral keys, and 2 retries.</p>
*/
public SecureChatCompletion() {
this(Constants.DEFAULT_BASE_URL, false, null, true, null, Constants.DEFAULT_MAX_RETRIES);
}
/**
* Constructs a {@code SecureChatCompletion} with the specified settings.
*
* @param baseUrl NOMYO Router base URL (HTTPS enforced unless {@code allowHttp} is {@code true})
* @param allowHttp permit {@code http://} URLs (development only)
* @param apiKey Bearer token for authentication (can also be passed per-call via {@link #create})
* @param secureMemory enable memory locking/zeroing (warns if unavailable)
* @param keyDir directory to persist RSA keys; {@code null} = ephemeral (in-memory only)
* @param maxRetries retries on 429/500/502/503/504 + network errors (exponential backoff: 1s, 2s, 4s)
*/
public SecureChatCompletion(
String baseUrl,
boolean allowHttp,
String apiKey,
boolean secureMemory,
String keyDir,
int maxRetries
) {
this.client = new SecureCompletionClient(baseUrl, allowHttp, secureMemory, maxRetries);
this.apiKey = apiKey;
this.keyDir = keyDir;
}
/**
* Creates a chat completion with the specified parameters.
*
* <p>This is the main entrypoint, with the same signature as
* {@code openai.ChatCompletion.create()}. Returns a map (not an object)
* containing the OpenAI-compatible response.</p>
*
* <h3>Parameters</h3>
* <table>
* <tr><th>Param</th><th>Type</th><th>Required</th><th>Description</th></tr>
* <tr><td>{@code model}</td><td>{@code String}</td><td>yes</td><td>Model identifier, e.g. "Qwen/Qwen3-0.6B"</td></tr>
* <tr><td>{@code messages}</td><td>{@code List<Map>}</td><td>yes</td><td>OpenAI-format messages</td></tr>
* <tr><td>{@code temperature}</td><td>{@code Double}</td><td>no</td><td>02</td></tr>
* <tr><td>{@code maxTokens}</td><td>{@code Integer}</td><td>no</td><td>Maximum tokens in response</td></tr>
* <tr><td>{@code topP}</td><td>{@code Double}</td><td>no</td><td>Top-p sampling parameter</td></tr>
* <tr><td>{@code stop}</td><td>{@code String | List<String>}</td><td>no</td><td>Stop sequences</td></tr>
* <tr><td>{@code presencePenalty}</td><td>{@code Double}</td><td>no</td><td>-2.0 to 2.0</td></tr>
* <tr><td>{@code frequencyPenalty}</td><td>{@code Double}</td><td>no</td><td>-2.0 to 2.0</td></tr>
* <tr><td>{@code n}</td><td>{@code Integer}</td><td>no</td><td>Number of completions</td></tr>
* <tr><td>{@code bestOf}</td><td>{@code Integer}</td><td>no</td><td></td></tr>
* <tr><td>{@code seed}</td><td>{@code Integer}</td><td>no</td><td>Reproducibility seed</td></tr>
* <tr><td>{@code logitBias}</td><td>{@code Map<String, Double>}</td><td>no</td><td>Token bias map</td></tr>
* <tr><td>{@code user}</td><td>{@code String}</td><td>no</td><td>End-user identifier</td></tr>
* <tr><td>{@code tools}</td><td>{@code List<Map>}</td><td>no</td><td>Tool definitions passed through to llama.cpp</td></tr>
* <tr><td>{@code toolChoice}</td><td>{@code String}</td><td>no</td><td>"auto", "none", or specific tool name</td></tr>
* <tr><td>{@code responseFormat}</td><td>{@code Map}</td><td>no</td><td>{"type": "json_object"} or {"type": "json_schema", ...}</td></tr>
* <tr><td>{@code stream}</td><td>{@code Boolean}</td><td>no</td><td><b>NOT supported.</b> Server rejects with HTTP 400. Always use {@code false}.</td></tr>
* <tr><td>{@code baseUrl}</td><td>{@code String}</td><td>no</td><td>Per-call override (creates temp client internally)</td></tr>
* <tr><td>{@code securityTier}</td><td>{@code String}</td><td>no</td><td>"standard", "high", or "maximum". Invalid values raise {@code ValueError}.</td></tr>
* <tr><td>{@code apiKey}</td><td>{@code String}</td><td>no</td><td>Per-call override of instance {@code apiKey}.</td></tr>
* </table>
*
* @param model model identifier (required)
* @param messages OpenAI-format message list (required)
* @param kwargs additional OpenAI-compatible parameters
* @return OpenAI-compatible response map (see §6.2 of reference docs)
* @throws SecurityError if encryption/decryption fails
* @throws APIConnectionError if a network error occurs
* @throws InvalidRequestError if the API returns 400
* @throws AuthenticationError if the API returns 401
* @throws ForbiddenError if the API returns 403
* @throws RateLimitError if the API returns 429
* @throws ServerError if the API returns 500
* @throws ServiceUnavailableError if the API returns 503
* @throws APIError for other errors
*/
public Map<String, Object> create(String model, List<Map<String, Object>> messages, Map<String, Object> kwargs) {
// Build payload from model, messages, and kwargs
// Validate stream is false
// Validate securityTier if provided
// Use per-call api_key override if provided, else instance apiKey
// Create temp client if baseUrl override provided
// Send secure request
// Return decrypted response map
return null;
}
/**
* Creates a chat completion with the specified model and messages.
* Convenience variant with no additional parameters.
*
* @param model model identifier (required)
* @param messages OpenAI-format message list (required)
* @return OpenAI-compatible response map
*/
public Map<String, Object> create(String model, List<Map<String, Object>> messages) {
return create(model, messages, null);
}
/**
* Async alias for {@link #create(String, List, Map)}. Identical behavior.
*
* @param model model identifier (required)
* @param messages OpenAI-format message list (required)
* @param kwargs additional OpenAI-compatible parameters
* @return OpenAI-compatible response map
*/
public Map<String, Object> acreate(String model, List<Map<String, Object>> messages, Map<String, Object> kwargs) {
return create(model, messages, kwargs);
}
/**
* Async alias for {@link #create(String, List)}. Identical behavior.
*
* @param model model identifier (required)
* @param messages OpenAI-format message list (required)
* @return OpenAI-compatible response map
*/
public Map<String, Object> acreate(String model, List<Map<String, Object>> messages) {
return create(model, messages);
}
/**
* Closes the client and releases any resources.
*/
public void close() {
client.close();
}
}