nomyo4J/src/main/java/ai/nomyo/SecureChatCompletion.java

package ai.nomyo;

import ai.nomyo.errors.*;
import lombok.Getter;

import java.util.List;
import java.util.Map;

/**
 * High-level OpenAI-compatible entrypoint for the NOMYO secure API.
 *
 * <p>This class provides a familiar API surface matching {@code openai.ChatCompletion.create()}.
 * All requests are automatically encrypted using hybrid AES-256-GCM + RSA-4096 encryption
 * before being sent to the NOMYO router.</p>
 *
 * <h3>Usage</h3>
 * <pre>{@code
 * SecureChatCompletion client = new SecureChatCompletion(
 *     "https://api.nomyo.ai",
 *     false,
 *     "your-api-key",
 *     true,
 *     "/path/to/keys",
 *     2
 * );
 *
 * Map<String, Object> response = client.create(
 *     "Qwen/Qwen3-0.6B",
 *     List.of(Map.of("role", "user", "content", "Hello, world!"))
 * );
 * }</pre>
 *
 * <h3>Streaming</h3>
 * <p>Streaming is <b>not supported</b>. The server rejects streaming requests with HTTP 400.
 * Always use {@code stream=false} (the default).</p>
 *
 * <h3>Security Tiers</h3>
 * <p>The {@code security_tier} parameter controls the hardware isolation level:</p>
 * <ul>
 *   <li>{@code "standard"} — GPU inference (general secure inference)</li>
 *   <li>{@code "high"} — CPU/GPU (sensitive business data)</li>
 *   <li>{@code "maximum"} — CPU only (PHI, classified data)</li>
 * </ul>
 *
 * <h3>Key Persistence</h3>
 * <p>Set {@code keyDir} to a directory path to persist RSA keys to disk.
 * Keys are generated on first use and reused across all calls.
 * Set {@code keyDir} to {@code null} for ephemeral keys (in-memory only, lost on restart).</p>
 */
@Getter
public class SecureChatCompletion {

    private final SecureCompletionClient client;
    private final String apiKey;
    private final String keyDir;

    /**
     * Constructs a {@code SecureChatCompletion} with default settings.
     *
     * <p>Uses the default NOMYO router URL ({@code https://api.nomyo.ai}),
     * HTTPS-only, secure memory enabled, ephemeral keys, and 2 retries.</p>
     */
    public SecureChatCompletion() {
        this(Constants.DEFAULT_BASE_URL, false, null, true, null, Constants.DEFAULT_MAX_RETRIES);
    }

    /**
     * Constructs a {@code SecureChatCompletion} with the specified settings.
     *
     * @param baseUrl       NOMYO Router base URL (HTTPS enforced unless {@code allowHttp} is {@code true})
     * @param allowHttp     permit {@code http://} URLs (development only)
     * @param apiKey        Bearer token for authentication (can also be passed per-call via {@link #create})
     * @param secureMemory  enable memory locking/zeroing (warns if unavailable)
     * @param keyDir        directory to persist RSA keys; {@code null} = ephemeral (in-memory only)
     * @param maxRetries    retries on 429/500/502/503/504 + network errors (exponential backoff: 1s, 2s, 4s…)
     */
    public SecureChatCompletion(
            String baseUrl,
            boolean allowHttp,
            String apiKey,
            boolean secureMemory,
            String keyDir,
            int maxRetries
    ) {
        this.client = new SecureCompletionClient(baseUrl, allowHttp, secureMemory, maxRetries);
        this.apiKey = apiKey;
        this.keyDir = keyDir;
    }

    /**
     * Creates a chat completion with the specified parameters.
     *
     * <p>This is the main entrypoint, with the same signature as
     * {@code openai.ChatCompletion.create()}. Returns a map (not an object)
     * containing the OpenAI-compatible response.</p>
     *
     * <h3>Parameters</h3>
     * <table>
     *   <tr><th>Param</th><th>Type</th><th>Required</th><th>Description</th></tr>
     *   <tr><td>{@code model}</td><td>{@code String}</td><td>yes</td><td>Model identifier, e.g. "Qwen/Qwen3-0.6B"</td></tr>
     *   <tr><td>{@code messages}</td><td>{@code List<Map>}</td><td>yes</td><td>OpenAI-format messages</td></tr>
     *   <tr><td>{@code temperature}</td><td>{@code Double}</td><td>no</td><td>0–2</td></tr>
     *   <tr><td>{@code maxTokens}</td><td>{@code Integer}</td><td>no</td><td>Maximum tokens in response</td></tr>
     *   <tr><td>{@code topP}</td><td>{@code Double}</td><td>no</td><td>Top-p sampling parameter</td></tr>
     *   <tr><td>{@code stop}</td><td>{@code String | List<String>}</td><td>no</td><td>Stop sequences</td></tr>
     *   <tr><td>{@code presencePenalty}</td><td>{@code Double}</td><td>no</td><td>-2.0 to 2.0</td></tr>
     *   <tr><td>{@code frequencyPenalty}</td><td>{@code Double}</td><td>no</td><td>-2.0 to 2.0</td></tr>
     *   <tr><td>{@code n}</td><td>{@code Integer}</td><td>no</td><td>Number of completions</td></tr>
     *   <tr><td>{@code bestOf}</td><td>{@code Integer}</td><td>no</td><td></td></tr>
     *   <tr><td>{@code seed}</td><td>{@code Integer}</td><td>no</td><td>Reproducibility seed</td></tr>
     *   <tr><td>{@code logitBias}</td><td>{@code Map<String, Double>}</td><td>no</td><td>Token bias map</td></tr>
     *   <tr><td>{@code user}</td><td>{@code String}</td><td>no</td><td>End-user identifier</td></tr>
     *   <tr><td>{@code tools}</td><td>{@code List<Map>}</td><td>no</td><td>Tool definitions passed through to llama.cpp</td></tr>
     *   <tr><td>{@code toolChoice}</td><td>{@code String}</td><td>no</td><td>"auto", "none", or specific tool name</td></tr>
     *   <tr><td>{@code responseFormat}</td><td>{@code Map}</td><td>no</td><td>{"type": "json_object"} or {"type": "json_schema", ...}</td></tr>
     *   <tr><td>{@code stream}</td><td>{@code Boolean}</td><td>no</td><td><b>NOT supported.</b> Server rejects with HTTP 400. Always use {@code false}.</td></tr>
     *   <tr><td>{@code baseUrl}</td><td>{@code String}</td><td>no</td><td>Per-call override (creates temp client internally)</td></tr>
     *   <tr><td>{@code securityTier}</td><td>{@code String}</td><td>no</td><td>"standard", "high", or "maximum". Invalid values raise {@code ValueError}.</td></tr>
     *   <tr><td>{@code apiKey}</td><td>{@code String}</td><td>no</td><td>Per-call override of instance {@code apiKey}.</td></tr>
     * </table>
     *
     * @param model     model identifier (required)
     * @param messages  OpenAI-format message list (required)
     * @param kwargs    additional OpenAI-compatible parameters
     * @return OpenAI-compatible response map (see §6.2 of reference docs)
     * @throws SecurityError       if encryption/decryption fails
     * @throws APIConnectionError  if a network error occurs
     * @throws InvalidRequestError if the API returns 400
     * @throws AuthenticationError if the API returns 401
     * @throws ForbiddenError      if the API returns 403
     * @throws RateLimitError      if the API returns 429
     * @throws ServerError         if the API returns 500
     * @throws ServiceUnavailableError if the API returns 503
     * @throws APIError            for other errors
     */
    public Map<String, Object> create(String model, List<Map<String, Object>> messages, Map<String, Object> kwargs) {
        // Build payload from model, messages, and kwargs
        // Validate stream is false
        // Validate securityTier if provided
        // Use per-call api_key override if provided, else instance apiKey
        // Create temp client if baseUrl override provided
        // Send secure request
        // Return decrypted response map
        return null;
    }

    /**
     * Creates a chat completion with the specified model and messages.
     * Convenience variant with no additional parameters.
     *
     * @param model     model identifier (required)
     * @param messages  OpenAI-format message list (required)
     * @return OpenAI-compatible response map
     */
    public Map<String, Object> create(String model, List<Map<String, Object>> messages) {
        return create(model, messages, null);
    }

    /**
     * Async alias for {@link #create(String, List, Map)}. Identical behavior.
     *
     * @param model     model identifier (required)
     * @param messages  OpenAI-format message list (required)
     * @param kwargs    additional OpenAI-compatible parameters
     * @return OpenAI-compatible response map
     */
    public Map<String, Object> acreate(String model, List<Map<String, Object>> messages, Map<String, Object> kwargs) {
        return create(model, messages, kwargs);
    }

    /**
     * Async alias for {@link #create(String, List)}. Identical behavior.
     *
     * @param model     model identifier (required)
     * @param messages  OpenAI-format message list (required)
     * @return OpenAI-compatible response map
     */
    public Map<String, Object> acreate(String model, List<Map<String, Object>> messages) {
        return create(model, messages);
    }

    /**
     * Closes the client and releases any resources.
     */
    public void close() {
        client.close();
    }
}