adding support for claude code routing (#575)

* fixed for claude code routing. first commit * removing redundant enum tags for cache_control * making sure that claude code can run via the archgw cli * fixing broken config * adding a README.md and updated the cli to use more of our defined patterns for params * fixed config.yaml * minor fixes to make sure PR is clean. Ready to ship * adding claude-sonnet-4-5 to the config * fixes based on PR * fixed alias for README * fixed 400 error handling tests, now that we write temperature to 1.0 for GPT-5 --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-257.local> Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-288.local>
2026-05-08 15:22:43 +02:00 · 2025-09-29 19:23:08 -07:00 · 2025-09-29 19:23:08 -07:00 · f00870dccb
commit f00870dccb
parent 03c2cf6f0d
16 changed files with 903 additions and 106 deletions
--- a/crates/hermesllm/src/apis/anthropic.rs
+++ b/crates/hermesllm/src/apis/anthropic.rs
--- a/crates/hermesllm/src/apis/openai.rs
+++ b/crates/hermesllm/src/apis/openai.rs
@ -88,6 +88,7 @@ pub struct ChatCompletionsRequest {
    pub prediction: Option<StaticContent>,
    // pub reasoning_effect: Option<bool>, // GOOD FIRST ISSUE: Future support for reasoning effects
    pub response_format: Option<Value>,
+    pub reasoning_effort: Option<String>, // e.g., "none", "low", "medium", "high"
    // pub safety_identifier: Option<String>, // GOOD FIRST ISSUE: Future support for safety identifiers
    pub seed: Option<i32>,
    pub service_tier: Option<String>,
@ -116,6 +117,13 @@ impl ChatCompletionsRequest {
            self.max_tokens = None;
        }
    }
+
+    pub fn fix_temperature_if_gpt5(&mut self) {
+        let model = self.model.as_str();
+        if model.starts_with("gpt-5") {
+            self.temperature = Some(1.0);
+        }
+    }
 }

 // ============================================================================
@ -598,6 +606,7 @@ impl TryFrom<&[u8]> for ChatCompletionsRequest {
       let mut req: ChatCompletionsRequest = serde_json::from_slice(bytes).map_err(OpenAIStreamError::from)?;
        // Use the centralized suppression logic
        req.suppress_max_tokens_if_o3();
+        req.fix_temperature_if_gpt5();
        Ok(req)
    }
 }