This commit is contained in:
Ahmed Ibrahim
2025-11-13 17:39:07 -08:00
parent ab1287cdb0
commit df640801f2
3 changed files with 14 additions and 3 deletions

1
codex-rs/Cargo.lock generated
View File

@@ -1553,6 +1553,7 @@ dependencies = [
"thiserror 2.0.17",
"tiktoken-rs",
"tokio",
"tracing",
]
[[package]]

View File

@@ -11,6 +11,7 @@ anyhow = { workspace = true }
thiserror = { workspace = true }
tiktoken-rs = "0.7"
tokio.workspace = true
tracing = { workspace = true }
[dev-dependencies]
pretty_assertions = { workspace = true }

View File

@@ -7,6 +7,9 @@ use anyhow::Context;
use anyhow::Error as AnyhowError;
use thiserror::Error;
use tiktoken_rs::CoreBPE;
use tracing::error;
static DEFAULT_TOKENIZER: OnceLock<Result<Arc<Tokenizer>, TokenizerError>> = OnceLock::new();
/// Supported local encodings.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
@@ -110,8 +113,6 @@ impl Tokenizer {
}
}
static DEFAULT_TOKENIZER: OnceLock<Result<Arc<Tokenizer>, TokenizerError>> = OnceLock::new();
pub fn warm_up_default_tokenizer() {
tokio::spawn(tokio::time::timeout(Duration::from_secs(5), async {
let _ = shared_default_tokenizer();
@@ -123,12 +124,20 @@ pub fn warm_up_default_tokenizer() {
#[must_use]
pub fn shared_default_tokenizer() -> Option<Arc<Tokenizer>> {
DEFAULT_TOKENIZER
.get_or_init(|| Tokenizer::try_default().map(Arc::new))
.get_or_init(init_default_tokenizer)
.as_ref()
.ok()
.cloned()
}
fn init_default_tokenizer() -> Result<Arc<Tokenizer>, TokenizerError> {
let result = Tokenizer::try_default().map(Arc::new);
if let Err(ref error) = result {
error!("failed to initialize default tokenizer: {error}");
}
result
}
#[cfg(test)]
mod tests {
use super::*;