Introduce hermesllm library to handle llm message translation (#501)

2026-06-26 15:39:40 +02:00 · 2025-06-10 12:53:27 -07:00 · 2025-06-10 12:53:27 -07:00 · 6c53510f49
commit 6c53510f49
parent 96b583c819
33 changed files with 1693 additions and 690 deletions
--- a/crates/common/src/configuration.rs
+++ b/crates/common/src/configuration.rs
@ -1,3 +1,4 @@
+use hermesllm::providers::openai::types::{ModelDetail, ModelObject, Models};
 use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
 use std::fmt::Display;
@ -206,6 +207,29 @@ pub struct LlmProvider {
    pub usage: Option<String>,
 }

+pub trait IntoModels {
+    fn into_models(self) -> Models;
+}
+
+impl IntoModels for Vec<LlmProvider> {
+    fn into_models(self) -> Models {
+        let data = self
+            .iter()
+            .map(|provider| ModelDetail {
+                id: provider.name.clone(),
+                object: "model".to_string(),
+                created: 0,
+                owned_by: "system".to_string(),
+            })
+            .collect();
+
+        Models {
+            object: ModelObject::List,
+            data,
+        }
+    }
+}
+
 impl Default for LlmProvider {
    fn default() -> Self {
        Self {
--- a/crates/common/src/errors.rs
+++ b/crates/common/src/errors.rs
@ -1,6 +1,7 @@
 use proxy_wasm::types::Status;

 use crate::{api::open_ai::ChatCompletionChunkResponseError, ratelimit};
+use hermesllm::providers::openai::types::OpenAIError;

 #[derive(thiserror::Error, Debug)]
 pub enum ClientError {
@ -39,4 +40,6 @@ pub enum ServerError {
    BadRequest { why: String },
    #[error("error in streaming response")]
    Streaming(#[from] ChatCompletionChunkResponseError),
+    #[error("error parsing openai message: {0}")]
+    OpenAIPError(#[from] OpenAIError),
 }
--- a/crates/common/src/tokenizer.rs
+++ b/crates/common/src/tokenizer.rs
@ -14,7 +14,7 @@ pub fn token_count(model_name: &str, text: &str) -> Result<usize, String> {
            );
            "gpt-4"
        }
-        true => model_name
+        true => model_name,
    };

    // Consideration: is it more expensive to instantiate the BPE object every time, or to contend the singleton?