Initial commit
This commit is contained in:
commit
8acf584d28
24 changed files with 2408 additions and 0 deletions
188
src/main/java/ai/nomyo/SecureChatCompletion.java
Normal file
188
src/main/java/ai/nomyo/SecureChatCompletion.java
Normal file
|
|
@ -0,0 +1,188 @@
|
|||
package ai.nomyo;
|
||||
|
||||
import ai.nomyo.errors.*;
|
||||
import lombok.Getter;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* High-level OpenAI-compatible entrypoint for the NOMYO secure API.
|
||||
*
|
||||
* <p>This class provides a familiar API surface matching {@code openai.ChatCompletion.create()}.
|
||||
* All requests are automatically encrypted using hybrid AES-256-GCM + RSA-4096 encryption
|
||||
* before being sent to the NOMYO router.</p>
|
||||
*
|
||||
* <h3>Usage</h3>
|
||||
* <pre>{@code
|
||||
* SecureChatCompletion client = new SecureChatCompletion(
|
||||
* "https://api.nomyo.ai",
|
||||
* false,
|
||||
* "your-api-key",
|
||||
* true,
|
||||
* "/path/to/keys",
|
||||
* 2
|
||||
* );
|
||||
*
|
||||
* Map<String, Object> response = client.create(
|
||||
* "Qwen/Qwen3-0.6B",
|
||||
* List.of(Map.of("role", "user", "content", "Hello, world!"))
|
||||
* );
|
||||
* }</pre>
|
||||
*
|
||||
* <h3>Streaming</h3>
|
||||
* <p>Streaming is <b>not supported</b>. The server rejects streaming requests with HTTP 400.
|
||||
* Always use {@code stream=false} (the default).</p>
|
||||
*
|
||||
* <h3>Security Tiers</h3>
|
||||
* <p>The {@code security_tier} parameter controls the hardware isolation level:</p>
|
||||
* <ul>
|
||||
* <li>{@code "standard"} — GPU inference (general secure inference)</li>
|
||||
* <li>{@code "high"} — CPU/GPU (sensitive business data)</li>
|
||||
* <li>{@code "maximum"} — CPU only (PHI, classified data)</li>
|
||||
* </ul>
|
||||
*
|
||||
* <h3>Key Persistence</h3>
|
||||
* <p>Set {@code keyDir} to a directory path to persist RSA keys to disk.
|
||||
* Keys are generated on first use and reused across all calls.
|
||||
* Set {@code keyDir} to {@code null} for ephemeral keys (in-memory only, lost on restart).</p>
|
||||
*/
|
||||
@Getter
|
||||
public class SecureChatCompletion {
|
||||
|
||||
private final SecureCompletionClient client;
|
||||
private final String apiKey;
|
||||
private final String keyDir;
|
||||
|
||||
/**
|
||||
* Constructs a {@code SecureChatCompletion} with default settings.
|
||||
*
|
||||
* <p>Uses the default NOMYO router URL ({@code https://api.nomyo.ai}),
|
||||
* HTTPS-only, secure memory enabled, ephemeral keys, and 2 retries.</p>
|
||||
*/
|
||||
public SecureChatCompletion() {
|
||||
this(Constants.DEFAULT_BASE_URL, false, null, true, null, Constants.DEFAULT_MAX_RETRIES);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a {@code SecureChatCompletion} with the specified settings.
|
||||
*
|
||||
* @param baseUrl NOMYO Router base URL (HTTPS enforced unless {@code allowHttp} is {@code true})
|
||||
* @param allowHttp permit {@code http://} URLs (development only)
|
||||
* @param apiKey Bearer token for authentication (can also be passed per-call via {@link #create})
|
||||
* @param secureMemory enable memory locking/zeroing (warns if unavailable)
|
||||
* @param keyDir directory to persist RSA keys; {@code null} = ephemeral (in-memory only)
|
||||
* @param maxRetries retries on 429/500/502/503/504 + network errors (exponential backoff: 1s, 2s, 4s…)
|
||||
*/
|
||||
public SecureChatCompletion(
|
||||
String baseUrl,
|
||||
boolean allowHttp,
|
||||
String apiKey,
|
||||
boolean secureMemory,
|
||||
String keyDir,
|
||||
int maxRetries
|
||||
) {
|
||||
this.client = new SecureCompletionClient(baseUrl, allowHttp, secureMemory, maxRetries);
|
||||
this.apiKey = apiKey;
|
||||
this.keyDir = keyDir;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a chat completion with the specified parameters.
|
||||
*
|
||||
* <p>This is the main entrypoint, with the same signature as
|
||||
* {@code openai.ChatCompletion.create()}. Returns a map (not an object)
|
||||
* containing the OpenAI-compatible response.</p>
|
||||
*
|
||||
* <h3>Parameters</h3>
|
||||
* <table>
|
||||
* <tr><th>Param</th><th>Type</th><th>Required</th><th>Description</th></tr>
|
||||
* <tr><td>{@code model}</td><td>{@code String}</td><td>yes</td><td>Model identifier, e.g. "Qwen/Qwen3-0.6B"</td></tr>
|
||||
* <tr><td>{@code messages}</td><td>{@code List<Map>}</td><td>yes</td><td>OpenAI-format messages</td></tr>
|
||||
* <tr><td>{@code temperature}</td><td>{@code Double}</td><td>no</td><td>0–2</td></tr>
|
||||
* <tr><td>{@code maxTokens}</td><td>{@code Integer}</td><td>no</td><td>Maximum tokens in response</td></tr>
|
||||
* <tr><td>{@code topP}</td><td>{@code Double}</td><td>no</td><td>Top-p sampling parameter</td></tr>
|
||||
* <tr><td>{@code stop}</td><td>{@code String | List<String>}</td><td>no</td><td>Stop sequences</td></tr>
|
||||
* <tr><td>{@code presencePenalty}</td><td>{@code Double}</td><td>no</td><td>-2.0 to 2.0</td></tr>
|
||||
* <tr><td>{@code frequencyPenalty}</td><td>{@code Double}</td><td>no</td><td>-2.0 to 2.0</td></tr>
|
||||
* <tr><td>{@code n}</td><td>{@code Integer}</td><td>no</td><td>Number of completions</td></tr>
|
||||
* <tr><td>{@code bestOf}</td><td>{@code Integer}</td><td>no</td><td></td></tr>
|
||||
* <tr><td>{@code seed}</td><td>{@code Integer}</td><td>no</td><td>Reproducibility seed</td></tr>
|
||||
* <tr><td>{@code logitBias}</td><td>{@code Map<String, Double>}</td><td>no</td><td>Token bias map</td></tr>
|
||||
* <tr><td>{@code user}</td><td>{@code String}</td><td>no</td><td>End-user identifier</td></tr>
|
||||
* <tr><td>{@code tools}</td><td>{@code List<Map>}</td><td>no</td><td>Tool definitions passed through to llama.cpp</td></tr>
|
||||
* <tr><td>{@code toolChoice}</td><td>{@code String}</td><td>no</td><td>"auto", "none", or specific tool name</td></tr>
|
||||
* <tr><td>{@code responseFormat}</td><td>{@code Map}</td><td>no</td><td>{"type": "json_object"} or {"type": "json_schema", ...}</td></tr>
|
||||
* <tr><td>{@code stream}</td><td>{@code Boolean}</td><td>no</td><td><b>NOT supported.</b> Server rejects with HTTP 400. Always use {@code false}.</td></tr>
|
||||
* <tr><td>{@code baseUrl}</td><td>{@code String}</td><td>no</td><td>Per-call override (creates temp client internally)</td></tr>
|
||||
* <tr><td>{@code securityTier}</td><td>{@code String}</td><td>no</td><td>"standard", "high", or "maximum". Invalid values raise {@code ValueError}.</td></tr>
|
||||
* <tr><td>{@code apiKey}</td><td>{@code String}</td><td>no</td><td>Per-call override of instance {@code apiKey}.</td></tr>
|
||||
* </table>
|
||||
*
|
||||
* @param model model identifier (required)
|
||||
* @param messages OpenAI-format message list (required)
|
||||
* @param kwargs additional OpenAI-compatible parameters
|
||||
* @return OpenAI-compatible response map (see §6.2 of reference docs)
|
||||
* @throws SecurityError if encryption/decryption fails
|
||||
* @throws APIConnectionError if a network error occurs
|
||||
* @throws InvalidRequestError if the API returns 400
|
||||
* @throws AuthenticationError if the API returns 401
|
||||
* @throws ForbiddenError if the API returns 403
|
||||
* @throws RateLimitError if the API returns 429
|
||||
* @throws ServerError if the API returns 500
|
||||
* @throws ServiceUnavailableError if the API returns 503
|
||||
* @throws APIError for other errors
|
||||
*/
|
||||
public Map<String, Object> create(String model, List<Map<String, Object>> messages, Map<String, Object> kwargs) {
|
||||
// Build payload from model, messages, and kwargs
|
||||
// Validate stream is false
|
||||
// Validate securityTier if provided
|
||||
// Use per-call api_key override if provided, else instance apiKey
|
||||
// Create temp client if baseUrl override provided
|
||||
// Send secure request
|
||||
// Return decrypted response map
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a chat completion with the specified model and messages.
|
||||
* Convenience variant with no additional parameters.
|
||||
*
|
||||
* @param model model identifier (required)
|
||||
* @param messages OpenAI-format message list (required)
|
||||
* @return OpenAI-compatible response map
|
||||
*/
|
||||
public Map<String, Object> create(String model, List<Map<String, Object>> messages) {
|
||||
return create(model, messages, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Async alias for {@link #create(String, List, Map)}. Identical behavior.
|
||||
*
|
||||
* @param model model identifier (required)
|
||||
* @param messages OpenAI-format message list (required)
|
||||
* @param kwargs additional OpenAI-compatible parameters
|
||||
* @return OpenAI-compatible response map
|
||||
*/
|
||||
public Map<String, Object> acreate(String model, List<Map<String, Object>> messages, Map<String, Object> kwargs) {
|
||||
return create(model, messages, kwargs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Async alias for {@link #create(String, List)}. Identical behavior.
|
||||
*
|
||||
* @param model model identifier (required)
|
||||
* @param messages OpenAI-format message list (required)
|
||||
* @return OpenAI-compatible response map
|
||||
*/
|
||||
public Map<String, Object> acreate(String model, List<Map<String, Object>> messages) {
|
||||
return create(model, messages);
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the client and releases any resources.
|
||||
*/
|
||||
public void close() {
|
||||
client.close();
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue