Add support for streaming and fixes few issues (see description) (#202)

This commit is contained in:
José Ulises Niño Rivera 2024-10-28 20:05:06 -04:00 committed by GitHub
parent 29ff8da60f
commit 662a840ac5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
45 changed files with 2266 additions and 477 deletions

View file

@ -1,17 +1,19 @@
use log::debug;
#[derive(Debug, PartialEq, Eq)]
#[derive(thiserror::Error, Debug, PartialEq, Eq)]
#[allow(dead_code)]
pub enum Error {
UnknownModel,
FailedToTokenize,
#[error("Unknown model: {model_name}")]
UnknownModel { model_name: String },
}
#[allow(dead_code)]
pub fn token_count(model_name: &str, text: &str) -> Result<usize, Error> {
debug!("getting token count model={}", model_name);
// Consideration: is it more expensive to instantiate the BPE object every time, or to contend the singleton?
let bpe = tiktoken_rs::get_bpe_from_model(model_name).map_err(|_| Error::UnknownModel)?;
let bpe = tiktoken_rs::get_bpe_from_model(model_name).map_err(|_| Error::UnknownModel {
model_name: model_name.to_string(),
})?;
Ok(bpe.encode_ordinary(text).len())
}
@ -32,7 +34,9 @@ mod test {
#[test]
fn unrecognized_model() {
assert_eq!(
Error::UnknownModel,
Error::UnknownModel {
model_name: "unknown".to_string()
},
token_count("unknown", "").expect_err("unknown model")
)
}