From 78dc4edad97c8708c3a69941092c105b2f366fa1 Mon Sep 17 00:00:00 2001 From: Musa Date: Thu, 23 Apr 2026 15:34:44 -0700 Subject: [PATCH] Add first-class ChatGPT subscription provider support (#881) * Add first-class ChatGPT subscription provider support * Address PR feedback: move uuid import to top, reuse parsed config in up() * Add ChatGPT token watchdog for seamless long-lived sessions * Address PR feedback: error on stream=false for ChatGPT, fix auth file permissions * Replace ChatGPT watchdog/restart with passthrough_auth --------- Co-authored-by: Musa Malik --- cli/planoai/chatgpt_auth.py | 290 ++++++++++++++++++ cli/planoai/chatgpt_cmd.py | 86 ++++++ cli/planoai/config_generator.py | 25 ++ cli/planoai/main.py | 32 ++ cli/planoai/native_runner.py | 15 +- config/plano_config_schema.yaml | 12 + crates/brightstaff/src/handlers/llm/mod.rs | 10 +- crates/common/src/configuration.rs | 5 + crates/common/src/llm_providers.rs | 1 + crates/hermesllm/src/bin/provider_models.yaml | 10 +- crates/hermesllm/src/clients/endpoints.rs | 4 +- crates/hermesllm/src/providers/id.rs | 10 +- crates/hermesllm/src/providers/request.rs | 64 +++- crates/llm_gateway/src/stream_context.rs | 23 +- .../chatgpt_subscription/README.md | 61 ++++ .../llm_routing/chatgpt_subscription/chat.py | 38 +++ .../chatgpt_subscription/config.yaml | 9 + .../chatgpt_subscription/test_chatgpt.sh | 18 ++ 18 files changed, 693 insertions(+), 20 deletions(-) create mode 100644 cli/planoai/chatgpt_auth.py create mode 100644 cli/planoai/chatgpt_cmd.py create mode 100644 demos/llm_routing/chatgpt_subscription/README.md create mode 100644 demos/llm_routing/chatgpt_subscription/chat.py create mode 100644 demos/llm_routing/chatgpt_subscription/config.yaml create mode 100755 demos/llm_routing/chatgpt_subscription/test_chatgpt.sh diff --git a/cli/planoai/chatgpt_auth.py b/cli/planoai/chatgpt_auth.py new file mode 100644 index 00000000..dbbde3ac --- /dev/null +++ b/cli/planoai/chatgpt_auth.py @@ -0,0 +1,290 @@ +""" +ChatGPT subscription OAuth device-flow authentication. + +Implements the device code flow used by OpenAI Codex CLI to authenticate +with a ChatGPT Plus/Pro subscription. Tokens are stored locally in +~/.plano/chatgpt/auth.json and auto-refreshed when expired. +""" + +import base64 +import json +import os +import time +from typing import Any, Dict, Optional, Tuple + +import requests + +from planoai.consts import PLANO_HOME + +# OAuth + API constants (derived from openai/codex) +CHATGPT_AUTH_BASE = "https://auth.openai.com" +CHATGPT_DEVICE_CODE_URL = f"{CHATGPT_AUTH_BASE}/api/accounts/deviceauth/usercode" +CHATGPT_DEVICE_TOKEN_URL = f"{CHATGPT_AUTH_BASE}/api/accounts/deviceauth/token" +CHATGPT_OAUTH_TOKEN_URL = f"{CHATGPT_AUTH_BASE}/oauth/token" +CHATGPT_DEVICE_VERIFY_URL = f"{CHATGPT_AUTH_BASE}/codex/device" +CHATGPT_API_BASE = "https://chatgpt.com/backend-api/codex" +CHATGPT_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann" + +# Local storage +CHATGPT_AUTH_DIR = os.path.join(PLANO_HOME, "chatgpt") +CHATGPT_AUTH_FILE = os.path.join(CHATGPT_AUTH_DIR, "auth.json") + +# Timeouts +TOKEN_EXPIRY_SKEW_SECONDS = 60 +DEVICE_CODE_TIMEOUT_SECONDS = 15 * 60 +DEVICE_CODE_POLL_SECONDS = 5 + + +def _ensure_auth_dir(): + os.makedirs(CHATGPT_AUTH_DIR, exist_ok=True) + + +def load_auth() -> Optional[Dict[str, Any]]: + """Load auth data from disk.""" + try: + with open(CHATGPT_AUTH_FILE, "r") as f: + return json.load(f) + except (IOError, json.JSONDecodeError): + return None + + +def save_auth(data: Dict[str, Any]): + """Save auth data to disk.""" + _ensure_auth_dir() + fd = os.open(CHATGPT_AUTH_FILE, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600) + with os.fdopen(fd, "w") as f: + json.dump(data, f, indent=2) + + +def delete_auth(): + """Remove stored credentials.""" + try: + os.remove(CHATGPT_AUTH_FILE) + except FileNotFoundError: + pass + + +def _decode_jwt_claims(token: str) -> Dict[str, Any]: + """Decode JWT payload without verification.""" + try: + parts = token.split(".") + if len(parts) < 2: + return {} + payload_b64 = parts[1] + payload_b64 += "=" * (-len(payload_b64) % 4) + return json.loads(base64.urlsafe_b64decode(payload_b64).decode("utf-8")) + except Exception: + return {} + + +def _get_expires_at(token: str) -> Optional[int]: + """Extract expiration time from JWT.""" + claims = _decode_jwt_claims(token) + exp = claims.get("exp") + return int(exp) if isinstance(exp, (int, float)) else None + + +def _extract_account_id(token: Optional[str]) -> Optional[str]: + """Extract ChatGPT account ID from JWT claims.""" + if not token: + return None + claims = _decode_jwt_claims(token) + auth_claims = claims.get("https://api.openai.com/auth") + if isinstance(auth_claims, dict): + account_id = auth_claims.get("chatgpt_account_id") + if isinstance(account_id, str) and account_id: + return account_id + return None + + +def _is_token_expired(auth_data: Dict[str, Any]) -> bool: + """Check if the access token is expired.""" + expires_at = auth_data.get("expires_at") + if expires_at is None: + access_token = auth_data.get("access_token") + if access_token: + expires_at = _get_expires_at(access_token) + if expires_at: + auth_data["expires_at"] = expires_at + save_auth(auth_data) + if expires_at is None: + return True + return time.time() >= float(expires_at) - TOKEN_EXPIRY_SKEW_SECONDS + + +def _refresh_tokens(refresh_token: str) -> Dict[str, str]: + """Refresh the access token using the refresh token.""" + resp = requests.post( + CHATGPT_OAUTH_TOKEN_URL, + json={ + "client_id": CHATGPT_CLIENT_ID, + "grant_type": "refresh_token", + "refresh_token": refresh_token, + "scope": "openid profile email", + }, + ) + resp.raise_for_status() + data = resp.json() + + access_token = data.get("access_token") + id_token = data.get("id_token") + if not access_token or not id_token: + raise RuntimeError(f"Refresh response missing fields: {data}") + + return { + "access_token": access_token, + "refresh_token": data.get("refresh_token", refresh_token), + "id_token": id_token, + } + + +def _build_auth_record(tokens: Dict[str, str]) -> Dict[str, Any]: + """Build the auth record to persist.""" + access_token = tokens.get("access_token") + id_token = tokens.get("id_token") + expires_at = _get_expires_at(access_token) if access_token else None + account_id = _extract_account_id(id_token or access_token) + return { + "access_token": access_token, + "refresh_token": tokens.get("refresh_token"), + "id_token": id_token, + "expires_at": expires_at, + "account_id": account_id, + } + + +def request_device_code() -> Dict[str, str]: + """Request a device code from OpenAI's device auth endpoint.""" + resp = requests.post( + CHATGPT_DEVICE_CODE_URL, + json={"client_id": CHATGPT_CLIENT_ID}, + ) + resp.raise_for_status() + data = resp.json() + + device_auth_id = data.get("device_auth_id") + user_code = data.get("user_code") or data.get("usercode") + interval = data.get("interval") + if not device_auth_id or not user_code: + raise RuntimeError(f"Device code response missing fields: {data}") + + return { + "device_auth_id": device_auth_id, + "user_code": user_code, + "interval": str(interval or "5"), + } + + +def poll_for_authorization(device_code: Dict[str, str]) -> Dict[str, str]: + """Poll until the user completes authorization. Returns code_data.""" + interval = int(device_code.get("interval", "5")) + start_time = time.time() + + while time.time() - start_time < DEVICE_CODE_TIMEOUT_SECONDS: + try: + resp = requests.post( + CHATGPT_DEVICE_TOKEN_URL, + json={ + "device_auth_id": device_code["device_auth_id"], + "user_code": device_code["user_code"], + }, + ) + if resp.status_code == 200: + data = resp.json() + if all( + key in data + for key in ("authorization_code", "code_challenge", "code_verifier") + ): + return data + if resp.status_code in (403, 404): + time.sleep(max(interval, DEVICE_CODE_POLL_SECONDS)) + continue + resp.raise_for_status() + except requests.HTTPError as exc: + if exc.response is not None and exc.response.status_code in (403, 404): + time.sleep(max(interval, DEVICE_CODE_POLL_SECONDS)) + continue + raise RuntimeError(f"Polling failed: {exc}") from exc + + time.sleep(max(interval, DEVICE_CODE_POLL_SECONDS)) + + raise RuntimeError("Timed out waiting for device authorization") + + +def exchange_code_for_tokens(code_data: Dict[str, str]) -> Dict[str, str]: + """Exchange the authorization code for access/refresh/id tokens.""" + redirect_uri = f"{CHATGPT_AUTH_BASE}/deviceauth/callback" + body = ( + "grant_type=authorization_code" + f"&code={code_data['authorization_code']}" + f"&redirect_uri={redirect_uri}" + f"&client_id={CHATGPT_CLIENT_ID}" + f"&code_verifier={code_data['code_verifier']}" + ) + resp = requests.post( + CHATGPT_OAUTH_TOKEN_URL, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + data=body, + ) + resp.raise_for_status() + data = resp.json() + + if not all(key in data for key in ("access_token", "refresh_token", "id_token")): + raise RuntimeError(f"Token exchange response missing fields: {data}") + + return { + "access_token": data["access_token"], + "refresh_token": data["refresh_token"], + "id_token": data["id_token"], + } + + +def login() -> Dict[str, Any]: + """Run the full device code login flow. Returns the auth record.""" + device_code = request_device_code() + auth_record = _build_auth_record({}) + auth_record["device_code_requested_at"] = time.time() + save_auth(auth_record) + + print( + "\nSign in with your ChatGPT account:\n" + f" 1) Visit: {CHATGPT_DEVICE_VERIFY_URL}\n" + f" 2) Enter code: {device_code['user_code']}\n\n" + "Device codes are a common phishing target. Never share this code.\n", + flush=True, + ) + + code_data = poll_for_authorization(device_code) + tokens = exchange_code_for_tokens(code_data) + auth_record = _build_auth_record(tokens) + save_auth(auth_record) + return auth_record + + +def get_access_token() -> Tuple[str, Optional[str]]: + """ + Get a valid access token and account ID. + Refreshes automatically if expired. Raises if no auth data exists. + Returns (access_token, account_id). + """ + auth_data = load_auth() + if not auth_data: + raise RuntimeError( + "No ChatGPT credentials found. Run 'planoai chatgpt login' first." + ) + + access_token = auth_data.get("access_token") + if access_token and not _is_token_expired(auth_data): + return access_token, auth_data.get("account_id") + + # Try refresh + refresh_token = auth_data.get("refresh_token") + if refresh_token: + tokens = _refresh_tokens(refresh_token) + auth_record = _build_auth_record(tokens) + save_auth(auth_record) + return auth_record["access_token"], auth_record.get("account_id") + + raise RuntimeError( + "ChatGPT token expired and refresh failed. Run 'planoai chatgpt login' again." + ) diff --git a/cli/planoai/chatgpt_cmd.py b/cli/planoai/chatgpt_cmd.py new file mode 100644 index 00000000..b61068c4 --- /dev/null +++ b/cli/planoai/chatgpt_cmd.py @@ -0,0 +1,86 @@ +""" +CLI commands for ChatGPT subscription management. + +Usage: + planoai chatgpt login - Authenticate with ChatGPT via device code flow + planoai chatgpt status - Check authentication status + planoai chatgpt logout - Remove stored credentials +""" + +import datetime + +import click +from rich.console import Console + +from planoai import chatgpt_auth + +console = Console() + + +@click.group() +def chatgpt(): + """ChatGPT subscription management.""" + pass + + +@chatgpt.command() +def login(): + """Authenticate with your ChatGPT subscription using device code flow.""" + try: + auth_record = chatgpt_auth.login() + account_id = auth_record.get("account_id", "unknown") + console.print( + f"\n[green]Successfully authenticated with ChatGPT![/green]" + f"\nAccount ID: {account_id}" + f"\nCredentials saved to: {chatgpt_auth.CHATGPT_AUTH_FILE}" + ) + except Exception as e: + console.print(f"\n[red]Authentication failed:[/red] {e}") + raise SystemExit(1) + + +@chatgpt.command() +def status(): + """Check ChatGPT authentication status.""" + auth_data = chatgpt_auth.load_auth() + if not auth_data or not auth_data.get("access_token"): + console.print( + "[yellow]Not authenticated.[/yellow] Run 'planoai chatgpt login'." + ) + return + + account_id = auth_data.get("account_id", "unknown") + expires_at = auth_data.get("expires_at") + + if expires_at: + expiry_time = datetime.datetime.fromtimestamp( + expires_at, tz=datetime.timezone.utc + ) + now = datetime.datetime.now(tz=datetime.timezone.utc) + if expiry_time > now: + remaining = expiry_time - now + console.print( + f"[green]Authenticated[/green]" + f"\n Account ID: {account_id}" + f"\n Token expires: {expiry_time.strftime('%Y-%m-%d %H:%M:%S UTC')}" + f" ({remaining.seconds // 60}m remaining)" + ) + else: + console.print( + f"[yellow]Token expired[/yellow]" + f"\n Account ID: {account_id}" + f"\n Expired at: {expiry_time.strftime('%Y-%m-%d %H:%M:%S UTC')}" + f"\n Will auto-refresh on next use, or run 'planoai chatgpt login'." + ) + else: + console.print( + f"[green]Authenticated[/green] (no expiry info)" + f"\n Account ID: {account_id}" + ) + + +@chatgpt.command() +def logout(): + """Remove stored ChatGPT credentials.""" + chatgpt_auth.delete_auth() + console.print("[green]ChatGPT credentials removed.[/green]") diff --git a/cli/planoai/config_generator.py b/cli/planoai/config_generator.py index d9d76d79..5eaae3c6 100644 --- a/cli/planoai/config_generator.py +++ b/cli/planoai/config_generator.py @@ -1,5 +1,6 @@ import json import os +import uuid from planoai.utils import convert_legacy_listeners from jinja2 import Environment, FileSystemLoader import yaml @@ -28,9 +29,14 @@ SUPPORTED_PROVIDERS_WITHOUT_BASE_URL = [ "xai", "moonshotai", "zhipu", + "chatgpt", "digitalocean", ] +CHATGPT_API_BASE = "https://chatgpt.com/backend-api/codex" +CHATGPT_DEFAULT_ORIGINATOR = "codex_cli_rs" +CHATGPT_DEFAULT_USER_AGENT = "codex_cli_rs/0.0.0 (Unknown 0; unknown) unknown" + SUPPORTED_PROVIDERS = ( SUPPORTED_PROVIDERS_WITHOUT_BASE_URL + SUPPORTED_PROVIDERS_WITH_BASE_URL ) @@ -332,6 +338,25 @@ def validate_and_render_schema(): provider = model_provider["provider"] model_provider["provider_interface"] = provider del model_provider["provider"] + + # Auto-wire ChatGPT provider: inject base_url, passthrough_auth, and extra headers + if provider == "chatgpt": + if not model_provider.get("base_url"): + model_provider["base_url"] = CHATGPT_API_BASE + if not model_provider.get("access_key") and not model_provider.get( + "passthrough_auth" + ): + model_provider["passthrough_auth"] = True + headers = model_provider.get("headers", {}) + headers.setdefault( + "ChatGPT-Account-Id", + os.environ.get("CHATGPT_ACCOUNT_ID", ""), + ) + headers.setdefault("originator", CHATGPT_DEFAULT_ORIGINATOR) + headers.setdefault("user-agent", CHATGPT_DEFAULT_USER_AGENT) + headers.setdefault("session_id", str(uuid.uuid4())) + model_provider["headers"] = headers + updated_model_providers.append(model_provider) if model_provider.get("base_url", None): diff --git a/cli/planoai/main.py b/cli/planoai/main.py index 5686b0ff..8e766cf8 100644 --- a/cli/planoai/main.py +++ b/cli/planoai/main.py @@ -37,6 +37,7 @@ from planoai.core import ( ) from planoai.init_cmd import init as init_cmd from planoai.trace_cmd import trace as trace_cmd, start_trace_listener_background +from planoai.chatgpt_cmd import chatgpt as chatgpt_cmd from planoai.obs_cmd import obs as obs_cmd from planoai.consts import ( DEFAULT_OTEL_TRACING_GRPC_ENDPOINT, @@ -125,6 +126,28 @@ def _temporary_cli_log_level(level: str | None): set_log_level(current_level) +def _inject_chatgpt_tokens_if_needed(config, env, console): + """If config uses chatgpt providers, resolve tokens from ~/.plano/chatgpt/auth.json.""" + providers = config.get("model_providers") or config.get("llm_providers") or [] + has_chatgpt = any(str(p.get("model", "")).startswith("chatgpt/") for p in providers) + if not has_chatgpt: + return + + try: + from planoai.chatgpt_auth import get_access_token + + access_token, account_id = get_access_token() + env["CHATGPT_ACCESS_TOKEN"] = access_token + if account_id: + env["CHATGPT_ACCOUNT_ID"] = account_id + except Exception as e: + console.print( + f"\n[red]ChatGPT auth error:[/red] {e}\n" + f"[dim]Run 'planoai chatgpt login' to authenticate.[/dim]\n" + ) + sys.exit(1) + + def _print_missing_keys(console, missing_keys: list[str]) -> None: console.print(f"\n[red]✗[/red] [red]Missing API keys![/red]\n") for key in missing_keys: @@ -418,6 +441,14 @@ def up( env = os.environ.copy() env.pop("PATH", None) + import yaml + + with open(plano_config_file, "r") as f: + plano_config = yaml.safe_load(f) + + # Inject ChatGPT tokens from ~/.plano/chatgpt/auth.json if any provider needs them + _inject_chatgpt_tokens_if_needed(plano_config, env, console) + # Check access keys access_keys = get_llm_provider_access_keys(plano_config_file=plano_config_file) access_keys = set(access_keys) @@ -715,6 +746,7 @@ main.add_command(cli_agent) main.add_command(generate_prompt_targets) main.add_command(init_cmd, name="init") main.add_command(trace_cmd, name="trace") +main.add_command(chatgpt_cmd, name="chatgpt") main.add_command(obs_cmd, name="obs") if __name__ == "__main__": diff --git a/cli/planoai/native_runner.py b/cli/planoai/native_runner.py index bbbbfd3e..1b58b36d 100644 --- a/cli/planoai/native_runner.py +++ b/cli/planoai/native_runner.py @@ -253,6 +253,7 @@ def start_native( log.info("Plano is running (native mode)") for port in gateway_ports: log.info(f" http://localhost:{port}") + break # Check if processes are still alive @@ -367,8 +368,11 @@ def _kill_pid(pid): pass -def stop_native(): - """Stop natively-running Envoy and brightstaff processes. +def stop_native(skip_pids: set | None = None): + """Stop natively-running Envoy, brightstaff, and watchdog processes. + + Args: + skip_pids: Set of PIDs to skip (used by the watchdog to avoid self-termination). Returns: bool: True if at least one process was running and received a stop signal, @@ -385,7 +389,12 @@ def stop_native(): brightstaff_pid = pids.get("brightstaff_pid") had_running_process = False - for name, pid in [("envoy", envoy_pid), ("brightstaff", brightstaff_pid)]: + for name, pid in [ + ("envoy", envoy_pid), + ("brightstaff", brightstaff_pid), + ]: + if skip_pids and pid in skip_pids: + continue if pid is None: continue try: diff --git a/config/plano_config_schema.yaml b/config/plano_config_schema.yaml index 22515ccf..6cf902c1 100644 --- a/config/plano_config_schema.yaml +++ b/config/plano_config_schema.yaml @@ -190,7 +190,13 @@ properties: - openai - xiaomi - gemini + - chatgpt - digitalocean + headers: + type: object + additionalProperties: + type: string + description: "Additional headers to send with upstream requests (e.g., ChatGPT-Account-Id, originator)." routing_preferences: type: array items: @@ -239,7 +245,13 @@ properties: - openai - xiaomi - gemini + - chatgpt - digitalocean + headers: + type: object + additionalProperties: + type: string + description: "Additional headers to send with upstream requests (e.g., ChatGPT-Account-Id, originator)." routing_preferences: type: array items: diff --git a/crates/brightstaff/src/handlers/llm/mod.rs b/crates/brightstaff/src/handlers/llm/mod.rs index 401cd0c5..3336209f 100644 --- a/crates/brightstaff/src/handlers/llm/mod.rs +++ b/crates/brightstaff/src/handlers/llm/mod.rs @@ -255,7 +255,15 @@ async fn llm_chat_inner( if let Some(ref client_api_kind) = client_api { let upstream_api = provider_id.compatible_api_for_client(client_api_kind, is_streaming_request); - client_request.normalize_for_upstream(provider_id, &upstream_api); + if let Err(e) = client_request.normalize_for_upstream(provider_id, &upstream_api) { + warn!( + "request_id={}: normalize_for_upstream failed: {}", + request_id, e + ); + let mut bad_request = Response::new(full(e.message)); + *bad_request.status_mut() = StatusCode::BAD_REQUEST; + return Ok(bad_request); + } } // --- Phase 2: Resolve conversation state (v1/responses API) --- diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index acf47e43..86aa331d 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -392,6 +392,8 @@ pub enum LlmProviderType { AmazonBedrock, #[serde(rename = "plano")] Plano, + #[serde(rename = "chatgpt")] + ChatGPT, #[serde(rename = "digitalocean")] DigitalOcean, } @@ -415,6 +417,7 @@ impl Display for LlmProviderType { LlmProviderType::Qwen => write!(f, "qwen"), LlmProviderType::AmazonBedrock => write!(f, "amazon_bedrock"), LlmProviderType::Plano => write!(f, "plano"), + LlmProviderType::ChatGPT => write!(f, "chatgpt"), LlmProviderType::DigitalOcean => write!(f, "digitalocean"), } } @@ -482,6 +485,7 @@ pub struct LlmProvider { pub base_url_path_prefix: Option, pub internal: Option, pub passthrough_auth: Option, + pub headers: Option>, } pub trait IntoModels { @@ -525,6 +529,7 @@ impl Default for LlmProvider { base_url_path_prefix: None, internal: None, passthrough_auth: None, + headers: None, } } } diff --git a/crates/common/src/llm_providers.rs b/crates/common/src/llm_providers.rs index b5c03b30..b4355a2f 100644 --- a/crates/common/src/llm_providers.rs +++ b/crates/common/src/llm_providers.rs @@ -277,6 +277,7 @@ mod tests { internal: None, stream: None, passthrough_auth: None, + headers: None, } } diff --git a/crates/hermesllm/src/bin/provider_models.yaml b/crates/hermesllm/src/bin/provider_models.yaml index d07e265d..2e9e0a9b 100644 --- a/crates/hermesllm/src/bin/provider_models.yaml +++ b/crates/hermesllm/src/bin/provider_models.yaml @@ -329,6 +329,10 @@ providers: - xiaomi/mimo-v2-flash - xiaomi/mimo-v2-omni - xiaomi/mimo-v2-pro + chatgpt: + - chatgpt/gpt-5.4 + - chatgpt/gpt-5.3-codex + - chatgpt/gpt-5.2 digitalocean: - digitalocean/openai-gpt-4.1 - digitalocean/openai-gpt-4o @@ -376,6 +380,6 @@ providers: - digitalocean/qwen3-embedding-0.6b - digitalocean/router:software-engineering metadata: - total_providers: 12 - total_models: 361 - last_updated: 2026-04-16T00:00:00.000000+00:00 + total_providers: 13 + total_models: 364 + last_updated: 2026-04-20T00:00:00.000000+00:00 diff --git a/crates/hermesllm/src/clients/endpoints.rs b/crates/hermesllm/src/clients/endpoints.rs index 39b34358..c2007844 100644 --- a/crates/hermesllm/src/clients/endpoints.rs +++ b/crates/hermesllm/src/clients/endpoints.rs @@ -192,7 +192,9 @@ impl SupportedAPIsFromClient { // For Responses API, check if provider supports it, otherwise translate to chat/completions match provider_id { // Providers that support /v1/responses natively - ProviderId::OpenAI | ProviderId::XAI => route_by_provider("/responses"), + ProviderId::OpenAI | ProviderId::XAI | ProviderId::ChatGPT => { + route_by_provider("/responses") + } // All other providers: translate to /chat/completions _ => route_by_provider("/chat/completions"), } diff --git a/crates/hermesllm/src/providers/id.rs b/crates/hermesllm/src/providers/id.rs index ee0fcff3..9e279524 100644 --- a/crates/hermesllm/src/providers/id.rs +++ b/crates/hermesllm/src/providers/id.rs @@ -44,6 +44,7 @@ pub enum ProviderId { Zhipu, Qwen, AmazonBedrock, + ChatGPT, DigitalOcean, } @@ -72,6 +73,7 @@ impl TryFrom<&str> for ProviderId { "qwen" => Ok(ProviderId::Qwen), "amazon_bedrock" => Ok(ProviderId::AmazonBedrock), "amazon" => Ok(ProviderId::AmazonBedrock), // alias + "chatgpt" => Ok(ProviderId::ChatGPT), "digitalocean" => Ok(ProviderId::DigitalOcean), "do" => Ok(ProviderId::DigitalOcean), // alias "do_ai" => Ok(ProviderId::DigitalOcean), // alias @@ -99,6 +101,7 @@ impl ProviderId { ProviderId::Moonshotai => "moonshotai", ProviderId::Zhipu => "z-ai", ProviderId::Qwen => "qwen", + ProviderId::ChatGPT => "chatgpt", ProviderId::DigitalOcean => "digitalocean", _ => return Vec::new(), }; @@ -154,6 +157,7 @@ impl ProviderId { | ProviderId::Moonshotai | ProviderId::Zhipu | ProviderId::Qwen + | ProviderId::ChatGPT | ProviderId::DigitalOcean, SupportedAPIsFromClient::AnthropicMessagesAPI(_), ) => SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions), @@ -174,13 +178,14 @@ impl ProviderId { | ProviderId::Moonshotai | ProviderId::Zhipu | ProviderId::Qwen + | ProviderId::ChatGPT | ProviderId::DigitalOcean, SupportedAPIsFromClient::OpenAIChatCompletions(_), ) => SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions), - // OpenAI Responses API - OpenAI and xAI support this natively + // OpenAI Responses API - OpenAI, xAI, and ChatGPT support this natively ( - ProviderId::OpenAI | ProviderId::XAI, + ProviderId::OpenAI | ProviderId::XAI | ProviderId::ChatGPT, SupportedAPIsFromClient::OpenAIResponsesAPI(_), ) => SupportedUpstreamAPIs::OpenAIResponsesAPI(OpenAIApi::Responses), @@ -241,6 +246,7 @@ impl Display for ProviderId { ProviderId::Zhipu => write!(f, "zhipu"), ProviderId::Qwen => write!(f, "qwen"), ProviderId::AmazonBedrock => write!(f, "amazon_bedrock"), + ProviderId::ChatGPT => write!(f, "chatgpt"), ProviderId::DigitalOcean => write!(f, "digitalocean"), } } diff --git a/crates/hermesllm/src/providers/request.rs b/crates/hermesllm/src/providers/request.rs index 92688133..aa100a17 100644 --- a/crates/hermesllm/src/providers/request.rs +++ b/crates/hermesllm/src/providers/request.rs @@ -77,7 +77,7 @@ impl ProviderRequestType { &mut self, provider_id: ProviderId, upstream_api: &SupportedUpstreamAPIs, - ) { + ) -> Result<(), ProviderRequestError> { if provider_id == ProviderId::XAI && matches!( upstream_api, @@ -89,6 +89,48 @@ impl ProviderRequestType { req.web_search_options = None; } } + + // ChatGPT requires instructions, store=false, and input as a list + if provider_id == ProviderId::ChatGPT { + if let Self::ResponsesAPIRequest(req) = self { + use crate::apis::openai_responses::{ + InputItem, InputMessage, InputParam, MessageContent, MessageRole, + }; + + const CHATGPT_BASE_INSTRUCTIONS: &str = + "You are Codex, based on GPT-5. You are running as a coding agent in the Codex CLI on a user's computer."; + match &req.instructions { + Some(existing) if existing.contains(CHATGPT_BASE_INSTRUCTIONS) => {} + Some(existing) => { + req.instructions = + Some(format!("{}\n\n{}", CHATGPT_BASE_INSTRUCTIONS, existing)); + } + None => { + req.instructions = Some(CHATGPT_BASE_INSTRUCTIONS.to_string()); + } + } + req.store = Some(false); + if req.stream == Some(false) { + return Err(ProviderRequestError { + message: "Non-streaming requests are not supported for the ChatGPT Codex provider. Set stream=true or omit the stream field.".to_string(), + source: None, + }); + } + req.stream = Some(true); + + // ChatGPT backend requires input to be a list, not a plain string + if let InputParam::Text(text) = &req.input { + req.input = InputParam::Items(vec![InputItem::Message(InputMessage { + role: MessageRole::User, + content: MessageContent::Text(text.clone()), + })]); + } + if let InputParam::SingleItem(item) = &req.input { + req.input = InputParam::Items(vec![item.clone()]); + } + } + } + Ok(()) } } @@ -824,10 +866,12 @@ mod tests { ..Default::default() }); - request.normalize_for_upstream( - ProviderId::XAI, - &SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions), - ); + request + .normalize_for_upstream( + ProviderId::XAI, + &SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions), + ) + .unwrap(); let ProviderRequestType::ChatCompletionsRequest(req) = request else { panic!("expected chat request"); @@ -852,10 +896,12 @@ mod tests { ..Default::default() }); - request.normalize_for_upstream( - ProviderId::OpenAI, - &SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions), - ); + request + .normalize_for_upstream( + ProviderId::OpenAI, + &SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions), + ) + .unwrap(); let ProviderRequestType::ChatCompletionsRequest(req) = request else { panic!("expected chat request"); diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index e7763ee0..fa9964dd 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -241,6 +241,14 @@ impl StreamContext { } } + // Apply any extra headers configured on the provider (e.g., ChatGPT-Account-Id, originator) + let headers = self.llm_provider().headers.clone(); + if let Some(headers) = headers { + for (key, value) in &headers { + self.set_http_request_header(key, Some(value)); + } + } + Ok(()) } @@ -1060,7 +1068,20 @@ impl HttpContext for StreamContext { match ProviderRequestType::try_from((deserialized_client_request, upstream)) { Ok(mut request) => { - request.normalize_for_upstream(self.get_provider_id(), upstream); + if let Err(e) = + request.normalize_for_upstream(self.get_provider_id(), upstream) + { + warn!( + "request_id={}: normalize_for_upstream failed: {}", + self.request_identifier(), + e + ); + self.send_server_error( + ServerError::LogicError(e.message), + Some(StatusCode::BAD_REQUEST), + ); + return Action::Pause; + } debug!( "request_id={}: upstream request payload: {}", self.request_identifier(), diff --git a/demos/llm_routing/chatgpt_subscription/README.md b/demos/llm_routing/chatgpt_subscription/README.md new file mode 100644 index 00000000..d091155a --- /dev/null +++ b/demos/llm_routing/chatgpt_subscription/README.md @@ -0,0 +1,61 @@ +# ChatGPT Subscription Routing + +Route requests through your ChatGPT Plus/Pro subscription using Plano. Uses the OpenAI Responses API under the hood, targeting `chatgpt.com/backend-api/codex/responses`. + +## Setup + +### 1. Authenticate with ChatGPT + +```bash +planoai chatgpt login +``` + +This opens a device code flow — visit the URL shown and enter the code. Tokens are saved to `~/.plano/chatgpt/auth.json`. + +### 2. Start Plano + +```bash +planoai up config.yaml +``` + +### 3. Send a request + +```bash +curl http://localhost:12000/v1/responses \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-5.2", + "input": "Hello, what model are you?" + }' +``` + +Or use the test script: + +```bash +bash test_chatgpt.sh +``` + +## How it works + +- `chatgpt/gpt-5.2` in the config tells Plano to use the ChatGPT subscription provider +- Plano reads OAuth tokens from `~/.plano/chatgpt/auth.json` (auto-refreshes if expired) +- Requests are proxied to `https://chatgpt.com/backend-api/codex/responses` with the required headers: + - `Authorization: Bearer ` + - `ChatGPT-Account-Id: ` + - `originator: codex_cli_rs` + - `session_id: ` + +## Available models + +``` +chatgpt/gpt-5.4 +chatgpt/gpt-5.3-codex +chatgpt/gpt-5.2 +``` + +## Managing credentials + +```bash +planoai chatgpt status # Check auth status +planoai chatgpt logout # Remove stored credentials +``` diff --git a/demos/llm_routing/chatgpt_subscription/chat.py b/demos/llm_routing/chatgpt_subscription/chat.py new file mode 100644 index 00000000..3c6b8ae3 --- /dev/null +++ b/demos/llm_routing/chatgpt_subscription/chat.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +"""Interactive chat with a model through Plano using the OpenAI SDK.""" + +import sys +from openai import OpenAI + +client = OpenAI(base_url="http://localhost:12000/v1", api_key="unused") + + +def run_chat(model): + print(f"Chatting with {model} via Plano (Ctrl+C to quit)\n") + history = [] + while True: + try: + user_input = input("you> ") + except (KeyboardInterrupt, EOFError): + print("\nbye") + break + if not user_input.strip(): + continue + + history.append({"role": "user", "content": user_input}) + + stream = client.responses.create(model=model, input=history, stream=True) + print(f"{model}> ", end="", flush=True) + full = "" + for event in stream: + if event.type == "response.output_text.delta": + print(event.delta, end="", flush=True) + full += event.delta + print() + + history.append({"role": "assistant", "content": full}) + + +if __name__ == "__main__": + model = sys.argv[1] if len(sys.argv) > 1 else "gpt-5.2" + run_chat(model) diff --git a/demos/llm_routing/chatgpt_subscription/config.yaml b/demos/llm_routing/chatgpt_subscription/config.yaml new file mode 100644 index 00000000..a7137b3d --- /dev/null +++ b/demos/llm_routing/chatgpt_subscription/config.yaml @@ -0,0 +1,9 @@ +version: v0.3.0 + +listeners: + - type: model + name: model_listener + port: 12000 + +model_providers: + - model: chatgpt/* diff --git a/demos/llm_routing/chatgpt_subscription/test_chatgpt.sh b/demos/llm_routing/chatgpt_subscription/test_chatgpt.sh new file mode 100755 index 00000000..5544049d --- /dev/null +++ b/demos/llm_routing/chatgpt_subscription/test_chatgpt.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Test ChatGPT subscription routing through Plano +# Prerequisites: planoai chatgpt login && planoai up config.yaml + +set -e + +echo "Testing ChatGPT subscription via Plano Responses API..." +echo "" + +curl -s http://localhost:12000/v1/responses \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-5.2", + "input": "What is 2 + 2? Reply in one word." + }' | python3 -m json.tool + +echo "" +echo "Done."