extract models manager and related ownership from core (#16508)

## Summary
- split `models-manager` out of `core` and add `ModelsManagerConfig`
plus `Config::to_models_manager_config()` so model metadata paths stop
depending on `core::Config`
- move login-owned/auth-owned code out of `core` into `codex-login`,
move model provider config into `codex-model-provider-info`, move API
bridge mapping into `codex-api`, move protocol-owned types/impls into
`codex-protocol`, and move response debug helpers into a dedicated
`response-debug-context` crate
- move feedback tag emission into `codex-feedback`, relocate tests to
the crates that now own the code, and keep broad temporary re-exports so
this PR avoids a giant import-only rewrite

## Major moves and decisions
- created `codex-models-manager` as the owner for model
cache/catalog/config/model info logic, including the new
`ModelsManagerConfig` struct
- created `codex-model-provider-info` as the owner for provider config
parsing/defaults and kept temporary `codex-login`/`codex-core`
re-exports for old import paths
- moved `api_bridge` error mapping + `CoreAuthProvider` into
`codex-api`, while `codex-login::api_bridge` temporarily re-exports
those symbols and keeps the `auth_provider_from_auth` wrapper
- moved `auth_env_telemetry` and `provider_auth` ownership to
`codex-login`
- moved `CodexErr` ownership to `codex-protocol::error`, plus
`StreamOutput`, `bytes_to_string_smart`, and network policy helpers to
protocol-owned modules
- created `codex-response-debug-context` for
`extract_response_debug_context`, `telemetry_transport_error_message`,
and related response-debug plumbing instead of leaving that behavior in
`core`
- moved `FeedbackRequestTags`, `emit_feedback_request_tags`, and
`emit_feedback_request_tags_with_auth_env` to `codex-feedback`
- deferred removal of temporary re-exports and the mechanical import
rewrites to a stacked follow-up PR so this PR stays reviewable

## Test moves
- moved auth refresh coverage from `core/tests/suite/auth_refresh.rs` to
`login/tests/suite/auth_refresh.rs`
- moved text encoding coverage from
`core/tests/suite/text_encoding_fix.rs` to
`protocol/src/exec_output_tests.rs`
- moved model info override coverage from
`core/tests/suite/model_info_overrides.rs` to
`models-manager/src/model_info_overrides_tests.rs`

---------

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Ahmed Ibrahim
2026-04-02 23:00:02 -07:00
committed by GitHub
parent 8cd7f20b48
commit 6fff9955f1
99 changed files with 1095 additions and 1137 deletions

View File

@@ -0,0 +1,116 @@
use serde::Deserialize;
use serde::Serialize;
use thiserror::Error;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(untagged)]
pub enum PlanType {
Known(KnownPlan),
Unknown(String),
}
impl PlanType {
pub fn from_raw_value(raw: &str) -> Self {
match raw.to_ascii_lowercase().as_str() {
"free" => Self::Known(KnownPlan::Free),
"go" => Self::Known(KnownPlan::Go),
"plus" => Self::Known(KnownPlan::Plus),
"pro" => Self::Known(KnownPlan::Pro),
"team" => Self::Known(KnownPlan::Team),
"self_serve_business_usage_based" => {
Self::Known(KnownPlan::SelfServeBusinessUsageBased)
}
"business" => Self::Known(KnownPlan::Business),
"enterprise_cbp_usage_based" => Self::Known(KnownPlan::EnterpriseCbpUsageBased),
"enterprise" | "hc" => Self::Known(KnownPlan::Enterprise),
"education" | "edu" => Self::Known(KnownPlan::Edu),
_ => Self::Unknown(raw.to_string()),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum KnownPlan {
Free,
Go,
Plus,
Pro,
Team,
#[serde(rename = "self_serve_business_usage_based")]
SelfServeBusinessUsageBased,
Business,
#[serde(rename = "enterprise_cbp_usage_based")]
EnterpriseCbpUsageBased,
#[serde(alias = "hc")]
Enterprise,
Edu,
}
impl KnownPlan {
pub fn display_name(self) -> &'static str {
match self {
Self::Free => "Free",
Self::Go => "Go",
Self::Plus => "Plus",
Self::Pro => "Pro",
Self::Team => "Team",
Self::SelfServeBusinessUsageBased => "Self Serve Business Usage Based",
Self::Business => "Business",
Self::EnterpriseCbpUsageBased => "Enterprise CBP Usage Based",
Self::Enterprise => "Enterprise",
Self::Edu => "Edu",
}
}
pub fn raw_value(self) -> &'static str {
match self {
Self::Free => "free",
Self::Go => "go",
Self::Plus => "plus",
Self::Pro => "pro",
Self::Team => "team",
Self::SelfServeBusinessUsageBased => "self_serve_business_usage_based",
Self::Business => "business",
Self::EnterpriseCbpUsageBased => "enterprise_cbp_usage_based",
Self::Enterprise => "enterprise",
Self::Edu => "edu",
}
}
pub fn is_workspace_account(self) -> bool {
matches!(
self,
Self::Team
| Self::SelfServeBusinessUsageBased
| Self::Business
| Self::EnterpriseCbpUsageBased
| Self::Enterprise
| Self::Edu
)
}
}
#[derive(Debug, Clone, PartialEq, Eq, Error)]
#[error("{message}")]
pub struct RefreshTokenFailedError {
pub reason: RefreshTokenFailedReason,
pub message: String,
}
impl RefreshTokenFailedError {
pub fn new(reason: RefreshTokenFailedReason, message: impl Into<String>) -> Self {
Self {
reason,
message: message.into(),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RefreshTokenFailedReason {
Expired,
Exhausted,
Revoked,
Other,
}

View File

@@ -0,0 +1,631 @@
use crate::ThreadId;
use crate::auth::KnownPlan;
use crate::auth::PlanType;
pub use crate::auth::RefreshTokenFailedError;
pub use crate::auth::RefreshTokenFailedReason;
use crate::exec_output::ExecToolCallOutput;
use crate::network_policy::NetworkPolicyDecisionPayload;
use crate::protocol::CodexErrorInfo;
use crate::protocol::ErrorEvent;
use crate::protocol::RateLimitSnapshot;
use crate::protocol::TruncationPolicy;
use chrono::DateTime;
use chrono::Datelike;
use chrono::Local;
use chrono::Utc;
use codex_async_utils::CancelErr;
use codex_utils_string::truncate_middle_chars;
use codex_utils_string::truncate_middle_with_token_budget;
use reqwest::StatusCode;
use serde_json;
use std::io;
use std::time::Duration;
use thiserror::Error;
use tokio::task::JoinError;
pub type Result<T> = std::result::Result<T, CodexErr>;
/// Limit UI error messages to a reasonable size while keeping useful context.
const ERROR_MESSAGE_UI_MAX_BYTES: usize = 2 * 1024;
#[derive(Error, Debug)]
pub enum SandboxErr {
/// Error from sandbox execution
#[error(
"sandbox denied exec error, exit code: {}, stdout: {}, stderr: {}",
.output.exit_code, .output.stdout.text, .output.stderr.text
)]
Denied {
output: Box<ExecToolCallOutput>,
network_policy_decision: Option<NetworkPolicyDecisionPayload>,
},
/// Error from linux seccomp filter setup
#[cfg(target_os = "linux")]
#[error("seccomp setup error")]
SeccompInstall(#[from] seccompiler::Error),
/// Error from linux seccomp backend
#[cfg(target_os = "linux")]
#[error("seccomp backend error")]
SeccompBackend(#[from] seccompiler::BackendError),
/// Command timed out
#[error("command timed out")]
Timeout { output: Box<ExecToolCallOutput> },
/// Command was killed by a signal
#[error("command was killed by a signal")]
Signal(i32),
/// Error from linux landlock
#[error("Landlock was not able to fully enforce all sandbox rules")]
LandlockRestrict,
}
#[derive(Error, Debug)]
pub enum CodexErr {
#[error("turn aborted. Something went wrong? Hit `/feedback` to report the issue.")]
TurnAborted,
/// Returned by ResponsesClient when the SSE stream disconnects or errors out **after** the HTTP
/// handshake has succeeded but **before** it finished emitting `response.completed`.
///
/// The Session loop treats this as a transient error and will automatically retry the turn.
///
/// Optionally includes the requested delay before retrying the turn.
#[error("stream disconnected before completion: {0}")]
Stream(String, Option<Duration>),
#[error(
"Codex ran out of room in the model's context window. Start a new thread or clear earlier history before retrying."
)]
ContextWindowExceeded,
#[error("no thread with id: {0}")]
ThreadNotFound(ThreadId),
#[error("agent thread limit reached (max {max_threads})")]
AgentLimitReached { max_threads: usize },
#[error("session configured event was not the first event in the stream")]
SessionConfiguredNotFirstEvent,
/// Returned by run_command_stream when the spawned child process timed out (10s).
#[error("timeout waiting for child process to exit")]
Timeout,
/// Returned by run_command_stream when the child could not be spawned (its stdout/stderr pipes
/// could not be captured). Analogous to the previous `CodexError::Spawn` variant.
#[error("spawn failed: child stdout/stderr not captured")]
Spawn,
/// Returned by run_command_stream when the user pressed Ctrl-C (SIGINT). Session uses this to
/// surface a polite FunctionCallOutput back to the model instead of crashing the CLI.
#[error("interrupted (Ctrl-C). Something went wrong? Hit `/feedback` to report the issue.")]
Interrupted,
/// Unexpected HTTP status code.
#[error("{0}")]
UnexpectedStatus(UnexpectedResponseError),
/// Invalid request.
#[error("{0}")]
InvalidRequest(String),
/// Invalid image.
#[error("Image poisoning")]
InvalidImageRequest(),
#[error("{0}")]
UsageLimitReached(UsageLimitReachedError),
#[error("Selected model is at capacity. Please try a different model.")]
ServerOverloaded,
#[error("{0}")]
ResponseStreamFailed(ResponseStreamFailed),
#[error("{0}")]
ConnectionFailed(ConnectionFailedError),
#[error("Quota exceeded. Check your plan and billing details.")]
QuotaExceeded,
#[error(
"To use Codex with your ChatGPT plan, upgrade to Plus: https://chatgpt.com/explore/plus."
)]
UsageNotIncluded,
#[error("We're currently experiencing high demand, which may cause temporary errors.")]
InternalServerError,
/// Retry limit exceeded.
#[error("{0}")]
RetryLimit(RetryLimitReachedError),
/// Agent loop died unexpectedly
#[error("internal error; agent loop died unexpectedly")]
InternalAgentDied,
/// Sandbox error
#[error("sandbox error: {0}")]
Sandbox(#[from] SandboxErr),
#[error("codex-linux-sandbox was required but not provided")]
LandlockSandboxExecutableNotProvided,
#[error("unsupported operation: {0}")]
UnsupportedOperation(String),
#[error("{0}")]
RefreshTokenFailed(RefreshTokenFailedError),
#[error("Fatal error: {0}")]
Fatal(String),
// -----------------------------------------------------------------
// Automatic conversions for common external error types
// -----------------------------------------------------------------
#[error(transparent)]
Io(#[from] io::Error),
#[error(transparent)]
Json(#[from] serde_json::Error),
#[cfg(target_os = "linux")]
#[error(transparent)]
LandlockRuleset(#[from] landlock::RulesetError),
#[cfg(target_os = "linux")]
#[error(transparent)]
LandlockPathFd(#[from] landlock::PathFdError),
#[error(transparent)]
TokioJoin(#[from] JoinError),
#[error("{0}")]
EnvVar(EnvVarError),
}
impl From<CancelErr> for CodexErr {
fn from(_: CancelErr) -> Self {
CodexErr::TurnAborted
}
}
impl CodexErr {
pub fn is_retryable(&self) -> bool {
match self {
CodexErr::TurnAborted
| CodexErr::Interrupted
| CodexErr::EnvVar(_)
| CodexErr::Fatal(_)
| CodexErr::UsageNotIncluded
| CodexErr::QuotaExceeded
| CodexErr::InvalidImageRequest()
| CodexErr::InvalidRequest(_)
| CodexErr::RefreshTokenFailed(_)
| CodexErr::UnsupportedOperation(_)
| CodexErr::Sandbox(_)
| CodexErr::LandlockSandboxExecutableNotProvided
| CodexErr::RetryLimit(_)
| CodexErr::ContextWindowExceeded
| CodexErr::ThreadNotFound(_)
| CodexErr::AgentLimitReached { .. }
| CodexErr::Spawn
| CodexErr::SessionConfiguredNotFirstEvent
| CodexErr::UsageLimitReached(_)
| CodexErr::ServerOverloaded => false,
CodexErr::Stream(..)
| CodexErr::Timeout
| CodexErr::UnexpectedStatus(_)
| CodexErr::ResponseStreamFailed(_)
| CodexErr::ConnectionFailed(_)
| CodexErr::InternalServerError
| CodexErr::InternalAgentDied
| CodexErr::Io(_)
| CodexErr::Json(_)
| CodexErr::TokioJoin(_) => true,
#[cfg(target_os = "linux")]
CodexErr::LandlockRuleset(_) | CodexErr::LandlockPathFd(_) => false,
}
}
/// Minimal shim so that existing `e.downcast_ref::<CodexErr>()` checks continue to compile
/// after replacing `anyhow::Error` in the return signature. This mirrors the behavior of
/// `anyhow::Error::downcast_ref` but works directly on our concrete enum.
pub fn downcast_ref<T: std::any::Any>(&self) -> Option<&T> {
(self as &dyn std::any::Any).downcast_ref::<T>()
}
/// Translate core error to client-facing protocol error.
pub fn to_codex_protocol_error(&self) -> CodexErrorInfo {
match self {
CodexErr::ContextWindowExceeded => CodexErrorInfo::ContextWindowExceeded,
CodexErr::UsageLimitReached(_)
| CodexErr::QuotaExceeded
| CodexErr::UsageNotIncluded => CodexErrorInfo::UsageLimitExceeded,
CodexErr::ServerOverloaded => CodexErrorInfo::ServerOverloaded,
CodexErr::RetryLimit(_) => CodexErrorInfo::ResponseTooManyFailedAttempts {
http_status_code: self.http_status_code_value(),
},
CodexErr::ConnectionFailed(_) => CodexErrorInfo::HttpConnectionFailed {
http_status_code: self.http_status_code_value(),
},
CodexErr::ResponseStreamFailed(_) => CodexErrorInfo::ResponseStreamConnectionFailed {
http_status_code: self.http_status_code_value(),
},
CodexErr::RefreshTokenFailed(_) => CodexErrorInfo::Unauthorized,
CodexErr::SessionConfiguredNotFirstEvent
| CodexErr::InternalServerError
| CodexErr::InternalAgentDied => CodexErrorInfo::InternalServerError,
CodexErr::UnsupportedOperation(_)
| CodexErr::ThreadNotFound(_)
| CodexErr::AgentLimitReached { .. } => CodexErrorInfo::BadRequest,
CodexErr::Sandbox(_) => CodexErrorInfo::SandboxError,
_ => CodexErrorInfo::Other,
}
}
pub fn to_error_event(&self, message_prefix: Option<String>) -> ErrorEvent {
let error_message = self.to_string();
let message: String = match message_prefix {
Some(prefix) => format!("{prefix}: {error_message}"),
None => error_message,
};
ErrorEvent {
message,
codex_error_info: Some(self.to_codex_protocol_error()),
}
}
pub fn http_status_code_value(&self) -> Option<u16> {
let http_status_code = match self {
CodexErr::RetryLimit(err) => Some(err.status),
CodexErr::UnexpectedStatus(err) => Some(err.status),
CodexErr::ConnectionFailed(err) => err.source.status(),
CodexErr::ResponseStreamFailed(err) => err.source.status(),
_ => None,
};
http_status_code.as_ref().map(StatusCode::as_u16)
}
}
#[derive(Debug)]
pub struct ConnectionFailedError {
pub source: reqwest::Error,
}
impl std::fmt::Display for ConnectionFailedError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Connection failed: {}", self.source)
}
}
#[derive(Debug)]
pub struct ResponseStreamFailed {
pub source: reqwest::Error,
pub request_id: Option<String>,
}
impl std::fmt::Display for ResponseStreamFailed {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"Error while reading the server response: {}{}",
self.source,
self.request_id
.as_ref()
.map(|id| format!(", request id: {id}"))
.unwrap_or_default()
)
}
}
#[derive(Debug)]
pub struct UnexpectedResponseError {
pub status: StatusCode,
pub body: String,
pub url: Option<String>,
pub cf_ray: Option<String>,
pub request_id: Option<String>,
pub identity_authorization_error: Option<String>,
pub identity_error_code: Option<String>,
}
const CLOUDFLARE_BLOCKED_MESSAGE: &str =
"Access blocked by Cloudflare. This usually happens when connecting from a restricted region";
const UNEXPECTED_RESPONSE_BODY_MAX_BYTES: usize = 1000;
impl UnexpectedResponseError {
fn display_body(&self) -> String {
if let Some(message) = self.extract_error_message() {
return message;
}
let trimmed_body = self.body.trim();
if trimmed_body.is_empty() {
return "Unknown error".to_string();
}
truncate_with_ellipsis(trimmed_body, UNEXPECTED_RESPONSE_BODY_MAX_BYTES)
}
fn extract_error_message(&self) -> Option<String> {
let json = serde_json::from_str::<serde_json::Value>(&self.body).ok()?;
let message = json
.get("error")
.and_then(|error| error.get("message"))
.and_then(serde_json::Value::as_str)?;
let message = message.trim();
if message.is_empty() {
None
} else {
Some(message.to_string())
}
}
fn friendly_message(&self) -> Option<String> {
if self.status != StatusCode::FORBIDDEN {
return None;
}
if !self.body.contains("Cloudflare") || !self.body.contains("blocked") {
return None;
}
let status = self.status;
let mut message = format!("{CLOUDFLARE_BLOCKED_MESSAGE} (status {status})");
if let Some(url) = &self.url {
message.push_str(&format!(", url: {url}"));
}
if let Some(cf_ray) = &self.cf_ray {
message.push_str(&format!(", cf-ray: {cf_ray}"));
}
if let Some(id) = &self.request_id {
message.push_str(&format!(", request id: {id}"));
}
if let Some(auth_error) = &self.identity_authorization_error {
message.push_str(&format!(", auth error: {auth_error}"));
}
if let Some(error_code) = &self.identity_error_code {
message.push_str(&format!(", auth error code: {error_code}"));
}
Some(message)
}
}
impl std::fmt::Display for UnexpectedResponseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Some(friendly) = self.friendly_message() {
write!(f, "{friendly}")
} else {
let status = self.status;
let body = self.display_body();
let mut message = format!("unexpected status {status}: {body}");
if let Some(url) = &self.url {
message.push_str(&format!(", url: {url}"));
}
if let Some(cf_ray) = &self.cf_ray {
message.push_str(&format!(", cf-ray: {cf_ray}"));
}
if let Some(id) = &self.request_id {
message.push_str(&format!(", request id: {id}"));
}
if let Some(auth_error) = &self.identity_authorization_error {
message.push_str(&format!(", auth error: {auth_error}"));
}
if let Some(error_code) = &self.identity_error_code {
message.push_str(&format!(", auth error code: {error_code}"));
}
write!(f, "{message}")
}
}
}
impl std::error::Error for UnexpectedResponseError {}
fn truncate_with_ellipsis(text: &str, max_bytes: usize) -> String {
if text.len() <= max_bytes {
return text.to_string();
}
let mut cut = max_bytes;
while !text.is_char_boundary(cut) {
cut = cut.saturating_sub(1);
}
let mut truncated = text[..cut].to_string();
truncated.push_str("...");
truncated
}
fn truncate_text(content: &str, policy: TruncationPolicy) -> String {
match policy {
TruncationPolicy::Bytes(bytes) => truncate_middle_chars(content, bytes),
TruncationPolicy::Tokens(tokens) => truncate_middle_with_token_budget(content, tokens).0,
}
}
#[derive(Debug)]
pub struct RetryLimitReachedError {
pub status: StatusCode,
pub request_id: Option<String>,
}
impl std::fmt::Display for RetryLimitReachedError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"exceeded retry limit, last status: {}{}",
self.status,
self.request_id
.as_ref()
.map(|id| format!(", request id: {id}"))
.unwrap_or_default()
)
}
}
#[derive(Debug)]
pub struct UsageLimitReachedError {
pub plan_type: Option<PlanType>,
pub resets_at: Option<DateTime<Utc>>,
pub rate_limits: Option<Box<RateLimitSnapshot>>,
pub promo_message: Option<String>,
}
impl std::fmt::Display for UsageLimitReachedError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Some(limit_name) = self
.rate_limits
.as_ref()
.and_then(|snapshot| snapshot.limit_name.as_deref())
.map(str::trim)
.filter(|name| !name.is_empty())
&& !limit_name.eq_ignore_ascii_case("codex")
{
return write!(
f,
"You've hit your usage limit for {limit_name}. Switch to another model now,{}",
retry_suffix_after_or(self.resets_at.as_ref())
);
}
if let Some(promo_message) = &self.promo_message {
return write!(
f,
"You've hit your usage limit. {promo_message},{}",
retry_suffix_after_or(self.resets_at.as_ref())
);
}
let message = match self.plan_type.as_ref() {
Some(PlanType::Known(KnownPlan::Plus)) => format!(
"You've hit your usage limit. Upgrade to Pro (https://chatgpt.com/explore/pro), visit https://chatgpt.com/codex/settings/usage to purchase more credits{}",
retry_suffix_after_or(self.resets_at.as_ref())
),
Some(PlanType::Known(
KnownPlan::Team
| KnownPlan::SelfServeBusinessUsageBased
| KnownPlan::Business
| KnownPlan::EnterpriseCbpUsageBased,
)) => {
format!(
"You've hit your usage limit. To get more access now, send a request to your admin{}",
retry_suffix_after_or(self.resets_at.as_ref())
)
}
Some(PlanType::Known(KnownPlan::Free)) | Some(PlanType::Known(KnownPlan::Go)) => {
format!(
"You've hit your usage limit. Upgrade to Plus to continue using Codex (https://chatgpt.com/explore/plus),{}",
retry_suffix_after_or(self.resets_at.as_ref())
)
}
Some(PlanType::Known(KnownPlan::Pro)) => format!(
"You've hit your usage limit. Visit https://chatgpt.com/codex/settings/usage to purchase more credits{}",
retry_suffix_after_or(self.resets_at.as_ref())
),
Some(PlanType::Known(KnownPlan::Enterprise))
| Some(PlanType::Known(KnownPlan::Edu)) => format!(
"You've hit your usage limit.{}",
retry_suffix(self.resets_at.as_ref())
),
Some(PlanType::Unknown(_)) | None => format!(
"You've hit your usage limit.{}",
retry_suffix(self.resets_at.as_ref())
),
};
write!(f, "{message}")
}
}
fn retry_suffix(resets_at: Option<&DateTime<Utc>>) -> String {
if let Some(resets_at) = resets_at {
let formatted = format_retry_timestamp(resets_at);
format!(" Try again at {formatted}.")
} else {
" Try again later.".to_string()
}
}
fn retry_suffix_after_or(resets_at: Option<&DateTime<Utc>>) -> String {
if let Some(resets_at) = resets_at {
let formatted = format_retry_timestamp(resets_at);
format!(" or try again at {formatted}.")
} else {
" or try again later.".to_string()
}
}
fn format_retry_timestamp(resets_at: &DateTime<Utc>) -> String {
let local_reset = resets_at.with_timezone(&Local);
let local_now = now_for_retry().with_timezone(&Local);
if local_reset.date_naive() == local_now.date_naive() {
local_reset.format("%-I:%M %p").to_string()
} else {
let suffix = day_suffix(local_reset.day());
local_reset
.format(&format!("%b %-d{suffix}, %Y %-I:%M %p"))
.to_string()
}
}
fn day_suffix(day: u32) -> &'static str {
match day {
11..=13 => "th",
_ => match day % 10 {
1 => "st",
2 => "nd", // codespell:ignore
3 => "rd",
_ => "th",
},
}
}
#[cfg(test)]
thread_local! {
static NOW_OVERRIDE: std::cell::RefCell<Option<DateTime<Utc>>> =
const { std::cell::RefCell::new(None) };
}
fn now_for_retry() -> DateTime<Utc> {
#[cfg(test)]
{
if let Some(now) = NOW_OVERRIDE.with(|cell| *cell.borrow()) {
return now;
}
}
Utc::now()
}
#[derive(Debug)]
pub struct EnvVarError {
/// Name of the environment variable that is missing.
pub var: String,
/// Optional instructions to help the user get a valid value for the
/// variable and set it.
pub instructions: Option<String>,
}
impl std::fmt::Display for EnvVarError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Missing environment variable: `{}`.", self.var)?;
if let Some(instructions) = &self.instructions {
write!(f, " {instructions}")?;
}
Ok(())
}
}
pub fn get_error_message_ui(e: &CodexErr) -> String {
let message = match e {
CodexErr::Sandbox(SandboxErr::Denied { output, .. }) => {
let aggregated = output.aggregated_output.text.trim();
if !aggregated.is_empty() {
output.aggregated_output.text.clone()
} else {
let stderr = output.stderr.text.trim();
let stdout = output.stdout.text.trim();
match (stderr.is_empty(), stdout.is_empty()) {
(false, false) => format!("{stderr}\n{stdout}"),
(false, true) => output.stderr.text.clone(),
(true, false) => output.stdout.text.clone(),
(true, true) => format!(
"command failed inside sandbox with exit code {}",
output.exit_code
),
}
}
}
// Timeouts are not sandbox errors from a UX perspective; present them plainly.
CodexErr::Sandbox(SandboxErr::Timeout { output }) => {
format!(
"error: command timed out after {} ms",
output.duration.as_millis()
)
}
_ => e.to_string(),
};
truncate_text(
&message,
TruncationPolicy::Bytes(ERROR_MESSAGE_UI_MAX_BYTES),
)
}
#[cfg(test)]
#[path = "error_tests.rs"]
mod tests;

View File

@@ -0,0 +1,546 @@
use super::*;
use crate::exec_output::StreamOutput;
use crate::protocol::RateLimitWindow;
use chrono::DateTime;
use chrono::Duration as ChronoDuration;
use chrono::TimeZone;
use chrono::Utc;
use http::Response as HttpResponse;
use pretty_assertions::assert_eq;
use reqwest::Response;
use reqwest::ResponseBuilderExt;
use reqwest::StatusCode;
use reqwest::Url;
fn rate_limit_snapshot() -> RateLimitSnapshot {
let primary_reset_at = Utc
.with_ymd_and_hms(2024, 1, 1, 1, 0, 0)
.unwrap()
.timestamp();
let secondary_reset_at = Utc
.with_ymd_and_hms(2024, 1, 1, 2, 0, 0)
.unwrap()
.timestamp();
RateLimitSnapshot {
limit_id: None,
limit_name: None,
primary: Some(RateLimitWindow {
used_percent: 50.0,
window_minutes: Some(60),
resets_at: Some(primary_reset_at),
}),
secondary: Some(RateLimitWindow {
used_percent: 30.0,
window_minutes: Some(120),
resets_at: Some(secondary_reset_at),
}),
credits: None,
plan_type: None,
}
}
fn with_now_override<T>(now: DateTime<Utc>, f: impl FnOnce() -> T) -> T {
NOW_OVERRIDE.with(|cell| {
*cell.borrow_mut() = Some(now);
let result = f();
*cell.borrow_mut() = None;
result
})
}
#[test]
fn usage_limit_reached_error_formats_plus_plan() {
let err = UsageLimitReachedError {
plan_type: Some(PlanType::Known(KnownPlan::Plus)),
resets_at: None,
rate_limits: Some(Box::new(rate_limit_snapshot())),
promo_message: None,
};
assert_eq!(
err.to_string(),
"You've hit your usage limit. Upgrade to Pro (https://chatgpt.com/explore/pro), visit https://chatgpt.com/codex/settings/usage to purchase more credits or try again later."
);
}
#[test]
fn server_overloaded_maps_to_protocol() {
let err = CodexErr::ServerOverloaded;
assert_eq!(
err.to_codex_protocol_error(),
CodexErrorInfo::ServerOverloaded
);
}
#[test]
fn sandbox_denied_uses_aggregated_output_when_stderr_empty() {
let output = ExecToolCallOutput {
exit_code: 77,
stdout: StreamOutput::new(String::new()),
stderr: StreamOutput::new(String::new()),
aggregated_output: StreamOutput::new("aggregate detail".to_string()),
duration: Duration::from_millis(10),
timed_out: false,
};
let err = CodexErr::Sandbox(SandboxErr::Denied {
output: Box::new(output),
network_policy_decision: None,
});
assert_eq!(get_error_message_ui(&err), "aggregate detail");
}
#[test]
fn sandbox_denied_reports_both_streams_when_available() {
let output = ExecToolCallOutput {
exit_code: 9,
stdout: StreamOutput::new("stdout detail".to_string()),
stderr: StreamOutput::new("stderr detail".to_string()),
aggregated_output: StreamOutput::new(String::new()),
duration: Duration::from_millis(10),
timed_out: false,
};
let err = CodexErr::Sandbox(SandboxErr::Denied {
output: Box::new(output),
network_policy_decision: None,
});
assert_eq!(get_error_message_ui(&err), "stderr detail\nstdout detail");
}
#[test]
fn sandbox_denied_reports_stdout_when_no_stderr() {
let output = ExecToolCallOutput {
exit_code: 11,
stdout: StreamOutput::new("stdout only".to_string()),
stderr: StreamOutput::new(String::new()),
aggregated_output: StreamOutput::new(String::new()),
duration: Duration::from_millis(8),
timed_out: false,
};
let err = CodexErr::Sandbox(SandboxErr::Denied {
output: Box::new(output),
network_policy_decision: None,
});
assert_eq!(get_error_message_ui(&err), "stdout only");
}
#[test]
fn to_error_event_handles_response_stream_failed() {
let response = HttpResponse::builder()
.status(StatusCode::TOO_MANY_REQUESTS)
.url(Url::parse("http://example.com").unwrap())
.body("")
.unwrap();
let source = Response::from(response).error_for_status_ref().unwrap_err();
let err = CodexErr::ResponseStreamFailed(ResponseStreamFailed {
source,
request_id: Some("req-123".to_string()),
});
let event = err.to_error_event(Some("prefix".to_string()));
assert_eq!(
event.message,
"prefix: Error while reading the server response: HTTP status client error (429 Too Many Requests) for url (http://example.com/), request id: req-123"
);
assert_eq!(
event.codex_error_info,
Some(CodexErrorInfo::ResponseStreamConnectionFailed {
http_status_code: Some(429)
})
);
}
#[test]
fn sandbox_denied_reports_exit_code_when_no_output_available() {
let output = ExecToolCallOutput {
exit_code: 13,
stdout: StreamOutput::new(String::new()),
stderr: StreamOutput::new(String::new()),
aggregated_output: StreamOutput::new(String::new()),
duration: Duration::from_millis(5),
timed_out: false,
};
let err = CodexErr::Sandbox(SandboxErr::Denied {
output: Box::new(output),
network_policy_decision: None,
});
assert_eq!(
get_error_message_ui(&err),
"command failed inside sandbox with exit code 13"
);
}
#[test]
fn usage_limit_reached_error_formats_free_plan() {
let err = UsageLimitReachedError {
plan_type: Some(PlanType::Known(KnownPlan::Free)),
resets_at: None,
rate_limits: Some(Box::new(rate_limit_snapshot())),
promo_message: None,
};
assert_eq!(
err.to_string(),
"You've hit your usage limit. Upgrade to Plus to continue using Codex (https://chatgpt.com/explore/plus), or try again later."
);
}
#[test]
fn usage_limit_reached_error_formats_go_plan() {
let err = UsageLimitReachedError {
plan_type: Some(PlanType::Known(KnownPlan::Go)),
resets_at: None,
rate_limits: Some(Box::new(rate_limit_snapshot())),
promo_message: None,
};
assert_eq!(
err.to_string(),
"You've hit your usage limit. Upgrade to Plus to continue using Codex (https://chatgpt.com/explore/plus), or try again later."
);
}
#[test]
fn usage_limit_reached_error_formats_default_when_none() {
let err = UsageLimitReachedError {
plan_type: None,
resets_at: None,
rate_limits: Some(Box::new(rate_limit_snapshot())),
promo_message: None,
};
assert_eq!(
err.to_string(),
"You've hit your usage limit. Try again later."
);
}
#[test]
fn usage_limit_reached_error_formats_team_plan() {
let base = Utc.with_ymd_and_hms(2024, 1, 1, 0, 0, 0).unwrap();
let resets_at = base + ChronoDuration::hours(1);
with_now_override(base, move || {
let expected_time = format_retry_timestamp(&resets_at);
let err = UsageLimitReachedError {
plan_type: Some(PlanType::Known(KnownPlan::Team)),
resets_at: Some(resets_at),
rate_limits: Some(Box::new(rate_limit_snapshot())),
promo_message: None,
};
let expected = format!(
"You've hit your usage limit. To get more access now, send a request to your admin or try again at {expected_time}."
);
assert_eq!(err.to_string(), expected);
});
}
#[test]
fn usage_limit_reached_error_formats_business_plan_without_reset() {
let err = UsageLimitReachedError {
plan_type: Some(PlanType::Known(KnownPlan::Business)),
resets_at: None,
rate_limits: Some(Box::new(rate_limit_snapshot())),
promo_message: None,
};
assert_eq!(
err.to_string(),
"You've hit your usage limit. To get more access now, send a request to your admin or try again later."
);
}
#[test]
fn usage_limit_reached_error_formats_self_serve_business_usage_based_plan() {
let err = UsageLimitReachedError {
plan_type: Some(PlanType::Known(KnownPlan::SelfServeBusinessUsageBased)),
resets_at: None,
rate_limits: Some(Box::new(rate_limit_snapshot())),
promo_message: None,
};
assert_eq!(
err.to_string(),
"You've hit your usage limit. To get more access now, send a request to your admin or try again later."
);
}
#[test]
fn usage_limit_reached_error_formats_enterprise_cbp_usage_based_plan() {
let err = UsageLimitReachedError {
plan_type: Some(PlanType::Known(KnownPlan::EnterpriseCbpUsageBased)),
resets_at: None,
rate_limits: Some(Box::new(rate_limit_snapshot())),
promo_message: None,
};
assert_eq!(
err.to_string(),
"You've hit your usage limit. To get more access now, send a request to your admin or try again later."
);
}
#[test]
fn usage_limit_reached_error_formats_default_for_other_plans() {
let err = UsageLimitReachedError {
plan_type: Some(PlanType::Known(KnownPlan::Enterprise)),
resets_at: None,
rate_limits: Some(Box::new(rate_limit_snapshot())),
promo_message: None,
};
assert_eq!(
err.to_string(),
"You've hit your usage limit. Try again later."
);
}
#[test]
fn usage_limit_reached_error_formats_pro_plan_with_reset() {
let base = Utc.with_ymd_and_hms(2024, 1, 1, 0, 0, 0).unwrap();
let resets_at = base + ChronoDuration::hours(1);
with_now_override(base, move || {
let expected_time = format_retry_timestamp(&resets_at);
let err = UsageLimitReachedError {
plan_type: Some(PlanType::Known(KnownPlan::Pro)),
resets_at: Some(resets_at),
rate_limits: Some(Box::new(rate_limit_snapshot())),
promo_message: None,
};
let expected = format!(
"You've hit your usage limit. Visit https://chatgpt.com/codex/settings/usage to purchase more credits or try again at {expected_time}."
);
assert_eq!(err.to_string(), expected);
});
}
#[test]
fn usage_limit_reached_error_hides_upsell_for_non_codex_limit_name() {
let base = Utc.with_ymd_and_hms(2024, 1, 1, 0, 0, 0).unwrap();
let resets_at = base + ChronoDuration::hours(1);
with_now_override(base, move || {
let expected_time = format_retry_timestamp(&resets_at);
let err = UsageLimitReachedError {
plan_type: Some(PlanType::Known(KnownPlan::Plus)),
resets_at: Some(resets_at),
rate_limits: Some(Box::new(RateLimitSnapshot {
limit_id: Some("codex_other".to_string()),
limit_name: Some("codex_other".to_string()),
..rate_limit_snapshot()
})),
promo_message: Some(
"Visit https://chatgpt.com/codex/settings/usage to purchase more credits"
.to_string(),
),
};
let expected = format!(
"You've hit your usage limit for codex_other. Switch to another model now, or try again at {expected_time}."
);
assert_eq!(err.to_string(), expected);
});
}
#[test]
fn usage_limit_reached_includes_minutes_when_available() {
let base = Utc.with_ymd_and_hms(2024, 1, 1, 0, 0, 0).unwrap();
let resets_at = base + ChronoDuration::minutes(5);
with_now_override(base, move || {
let expected_time = format_retry_timestamp(&resets_at);
let err = UsageLimitReachedError {
plan_type: None,
resets_at: Some(resets_at),
rate_limits: Some(Box::new(rate_limit_snapshot())),
promo_message: None,
};
let expected = format!("You've hit your usage limit. Try again at {expected_time}.");
assert_eq!(err.to_string(), expected);
});
}
#[test]
fn unexpected_status_cloudflare_html_is_simplified() {
let err = UnexpectedResponseError {
status: StatusCode::FORBIDDEN,
body: "<html><body>Cloudflare error: Sorry, you have been blocked</body></html>"
.to_string(),
url: Some("http://example.com/blocked".to_string()),
cf_ray: Some("ray-id".to_string()),
request_id: None,
identity_authorization_error: None,
identity_error_code: None,
};
let status = StatusCode::FORBIDDEN.to_string();
let url = "http://example.com/blocked";
assert_eq!(
err.to_string(),
format!("{CLOUDFLARE_BLOCKED_MESSAGE} (status {status}), url: {url}, cf-ray: ray-id")
);
}
#[test]
fn unexpected_status_non_html_is_unchanged() {
let err = UnexpectedResponseError {
status: StatusCode::FORBIDDEN,
body: "plain text error".to_string(),
url: Some("http://example.com/plain".to_string()),
cf_ray: None,
request_id: None,
identity_authorization_error: None,
identity_error_code: None,
};
let status = StatusCode::FORBIDDEN.to_string();
let url = "http://example.com/plain";
assert_eq!(
err.to_string(),
format!("unexpected status {status}: plain text error, url: {url}")
);
}
#[test]
fn unexpected_status_prefers_error_message_when_present() {
let err = UnexpectedResponseError {
status: StatusCode::UNAUTHORIZED,
body: r#"{"error":{"message":"Workspace is not authorized in this region."},"status":401}"#
.to_string(),
url: Some("https://chatgpt.com/backend-api/codex/responses".to_string()),
cf_ray: None,
request_id: Some("req-123".to_string()),
identity_authorization_error: None,
identity_error_code: None,
};
let status = StatusCode::UNAUTHORIZED.to_string();
assert_eq!(
err.to_string(),
format!(
"unexpected status {status}: Workspace is not authorized in this region., url: https://chatgpt.com/backend-api/codex/responses, request id: req-123"
)
);
}
#[test]
fn unexpected_status_truncates_long_body_with_ellipsis() {
let long_body = "x".repeat(UNEXPECTED_RESPONSE_BODY_MAX_BYTES + 10);
let err = UnexpectedResponseError {
status: StatusCode::BAD_GATEWAY,
body: long_body,
url: Some("http://example.com/long".to_string()),
cf_ray: None,
request_id: Some("req-long".to_string()),
identity_authorization_error: None,
identity_error_code: None,
};
let status = StatusCode::BAD_GATEWAY.to_string();
let expected_body = format!("{}...", "x".repeat(UNEXPECTED_RESPONSE_BODY_MAX_BYTES));
assert_eq!(
err.to_string(),
format!(
"unexpected status {status}: {expected_body}, url: http://example.com/long, request id: req-long"
)
);
}
#[test]
fn unexpected_status_includes_cf_ray_and_request_id() {
let err = UnexpectedResponseError {
status: StatusCode::UNAUTHORIZED,
body: "plain text error".to_string(),
url: Some("https://chatgpt.com/backend-api/codex/responses".to_string()),
cf_ray: Some("9c81f9f18f2fa49d-LHR".to_string()),
request_id: Some("req-xyz".to_string()),
identity_authorization_error: None,
identity_error_code: None,
};
let status = StatusCode::UNAUTHORIZED.to_string();
assert_eq!(
err.to_string(),
format!(
"unexpected status {status}: plain text error, url: https://chatgpt.com/backend-api/codex/responses, cf-ray: 9c81f9f18f2fa49d-LHR, request id: req-xyz"
)
);
}
#[test]
fn unexpected_status_includes_identity_auth_details() {
let err = UnexpectedResponseError {
status: StatusCode::UNAUTHORIZED,
body: "plain text error".to_string(),
url: Some("https://chatgpt.com/backend-api/codex/models".to_string()),
cf_ray: Some("cf-ray-auth-401-test".to_string()),
request_id: Some("req-auth".to_string()),
identity_authorization_error: Some("missing_authorization_header".to_string()),
identity_error_code: Some("token_expired".to_string()),
};
let status = StatusCode::UNAUTHORIZED.to_string();
assert_eq!(
err.to_string(),
format!(
"unexpected status {status}: plain text error, url: https://chatgpt.com/backend-api/codex/models, cf-ray: cf-ray-auth-401-test, request id: req-auth, auth error: missing_authorization_header, auth error code: token_expired"
)
);
}
#[test]
fn usage_limit_reached_includes_hours_and_minutes() {
let base = Utc.with_ymd_and_hms(2024, 1, 1, 0, 0, 0).unwrap();
let resets_at = base + ChronoDuration::hours(3) + ChronoDuration::minutes(32);
with_now_override(base, move || {
let expected_time = format_retry_timestamp(&resets_at);
let err = UsageLimitReachedError {
plan_type: Some(PlanType::Known(KnownPlan::Plus)),
resets_at: Some(resets_at),
rate_limits: Some(Box::new(rate_limit_snapshot())),
promo_message: None,
};
let expected = format!(
"You've hit your usage limit. Upgrade to Pro (https://chatgpt.com/explore/pro), visit https://chatgpt.com/codex/settings/usage to purchase more credits or try again at {expected_time}."
);
assert_eq!(err.to_string(), expected);
});
}
#[test]
fn usage_limit_reached_includes_days_hours_minutes() {
let base = Utc.with_ymd_and_hms(2024, 1, 1, 0, 0, 0).unwrap();
let resets_at =
base + ChronoDuration::days(2) + ChronoDuration::hours(3) + ChronoDuration::minutes(5);
with_now_override(base, move || {
let expected_time = format_retry_timestamp(&resets_at);
let err = UsageLimitReachedError {
plan_type: None,
resets_at: Some(resets_at),
rate_limits: Some(Box::new(rate_limit_snapshot())),
promo_message: None,
};
let expected = format!("You've hit your usage limit. Try again at {expected_time}.");
assert_eq!(err.to_string(), expected);
});
}
#[test]
fn usage_limit_reached_less_than_minute() {
let base = Utc.with_ymd_and_hms(2024, 1, 1, 0, 0, 0).unwrap();
let resets_at = base + ChronoDuration::seconds(30);
with_now_override(base, move || {
let expected_time = format_retry_timestamp(&resets_at);
let err = UsageLimitReachedError {
plan_type: None,
resets_at: Some(resets_at),
rate_limits: Some(Box::new(rate_limit_snapshot())),
promo_message: None,
};
let expected = format!("You've hit your usage limit. Try again at {expected_time}.");
assert_eq!(err.to_string(), expected);
});
}
#[test]
fn usage_limit_reached_with_promo_message() {
let base = Utc.with_ymd_and_hms(2024, 1, 1, 0, 0, 0).unwrap();
let resets_at = base + ChronoDuration::seconds(30);
with_now_override(base, move || {
let expected_time = format_retry_timestamp(&resets_at);
let err = UsageLimitReachedError {
plan_type: None,
resets_at: Some(resets_at),
rate_limits: Some(Box::new(rate_limit_snapshot())),
promo_message: Some(
"To continue using Codex, start a free trial of <PLAN> today".to_string(),
),
};
let expected = format!(
"You've hit your usage limit. To continue using Codex, start a free trial of <PLAN> today, or try again at {expected_time}."
);
assert_eq!(err.to_string(), expected);
});
}

View File

@@ -0,0 +1,169 @@
//! Text encoding detection and conversion utilities for shell output.
//!
//! Windows users frequently run into code pages such as CP1251 or CP866 when invoking commands
//! through VS Code. Those bytes show up as invalid UTF-8 and used to be replaced with the standard
//! Unicode replacement character. We now lean on `chardetng` and `encoding_rs` so we can
//! automatically detect and decode the vast majority of legacy encodings before falling back to
//! lossy UTF-8 decoding.
use chardetng::EncodingDetector;
use encoding_rs::Encoding;
use encoding_rs::IBM866;
use encoding_rs::WINDOWS_1252;
use std::time::Duration;
#[derive(Debug, Clone)]
pub struct StreamOutput<T: Clone> {
pub text: T,
pub truncated_after_lines: Option<u32>,
}
impl StreamOutput<String> {
pub fn new(text: String) -> Self {
Self {
text,
truncated_after_lines: None,
}
}
}
impl StreamOutput<Vec<u8>> {
pub fn from_utf8_lossy(&self) -> StreamOutput<String> {
StreamOutput {
text: bytes_to_string_smart(&self.text),
truncated_after_lines: self.truncated_after_lines,
}
}
}
#[derive(Clone, Debug)]
pub struct ExecToolCallOutput {
pub exit_code: i32,
pub stdout: StreamOutput<String>,
pub stderr: StreamOutput<String>,
pub aggregated_output: StreamOutput<String>,
pub duration: Duration,
pub timed_out: bool,
}
impl Default for ExecToolCallOutput {
fn default() -> Self {
Self {
exit_code: 0,
stdout: StreamOutput::new(String::new()),
stderr: StreamOutput::new(String::new()),
aggregated_output: StreamOutput::new(String::new()),
duration: Duration::ZERO,
timed_out: false,
}
}
}
/// Attempts to convert arbitrary bytes to UTF-8 with best-effort encoding detection.
pub fn bytes_to_string_smart(bytes: &[u8]) -> String {
if bytes.is_empty() {
return String::new();
}
if let Ok(utf8_str) = std::str::from_utf8(bytes) {
return utf8_str.to_owned();
}
let encoding = detect_encoding(bytes);
decode_bytes(bytes, encoding)
}
// Windows-1252 reassigns a handful of 0x80-0x9F slots to smart punctuation (curly quotes, dashes,
// ™). CP866 uses those *same byte values* for uppercase Cyrillic letters. When chardetng sees shell
// snippets that mix these bytes with ASCII it sometimes guesses IBM866, so “smart quotes” render as
// Cyrillic garbage (“УФЦ”) in VS Code. However, CP866 uppercase tokens are perfectly valid output
// (e.g., `ПРИ test`) so we cannot flip every 0x80-0x9F byte to Windows-1252 either. The compromise
// is to only coerce IBM866 to Windows-1252 when (a) the high bytes are exclusively the punctuation
// values listed below and (b) we spot adjacent ASCII. This targets the real failure case without
// clobbering legitimate Cyrillic text. If another code page has a similar collision, introduce a
// dedicated allowlist (like this one) plus unit tests that capture the actual shell output we want
// to preserve. Windows-1252 byte values for smart punctuation.
const WINDOWS_1252_PUNCT_BYTES: [u8; 8] = [
0x91, // (left single quotation mark)
0x92, // (right single quotation mark)
0x93, // “ (left double quotation mark)
0x94, // ” (right double quotation mark)
0x95, // • (bullet)
0x96, // (en dash)
0x97, // — (em dash)
0x99, // ™ (trade mark sign)
];
fn detect_encoding(bytes: &[u8]) -> &'static Encoding {
let mut detector = EncodingDetector::new();
detector.feed(bytes, true);
let (encoding, _is_confident) = detector.guess_assess(None, true);
// chardetng occasionally reports IBM866 for short strings that only contain Windows-1252 “smart
// punctuation” bytes (0x80-0x9F) because that range maps to Cyrillic letters in IBM866. When
// those bytes show up alongside an ASCII word (typical shell output: `"“`test), we know the
// intent was likely CP1252 quotes/dashes. Prefer WINDOWS_1252 in that specific situation so we
// render the characters users expect instead of Cyrillic junk. References:
// - Windows-1252 reserving 0x80-0x9F for curly quotes/dashes:
// https://en.wikipedia.org/wiki/Windows-1252
// - CP866 mapping 0x93/0x94/0x96 to Cyrillic letters, so the same bytes show up as “УФЦ” when
// mis-decoded: https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP866.TXT
if encoding == IBM866 && looks_like_windows_1252_punctuation(bytes) {
return WINDOWS_1252;
}
encoding
}
fn decode_bytes(bytes: &[u8], encoding: &'static Encoding) -> String {
let (decoded, _, had_errors) = encoding.decode(bytes);
if had_errors {
return String::from_utf8_lossy(bytes).into_owned();
}
decoded.into_owned()
}
/// Detect whether the byte stream looks like Windows-1252 “smart punctuation” wrapped around
/// otherwise-ASCII text.
///
/// Context: IBM866 and Windows-1252 share the 0x80-0x9F slot range. In IBM866 these bytes decode to
/// Cyrillic letters, whereas Windows-1252 maps them to curly quotes and dashes. chardetng can guess
/// IBM866 for short snippets that only contain those bytes, which turns shell output such as
/// `“test”` into unreadable Cyrillic. To avoid that, we treat inputs comprising a handful of bytes
/// from the problematic range plus ASCII letters as CP1252 punctuation. We deliberately do *not*
/// cap how many of those punctuation bytes we accept: VS Code frequently prints several quoted
/// phrases (e.g., `"foo" - "bar"`), and truncating the count would once again mis-decode those as
/// Cyrillic. If we discover additional encodings with overlapping byte ranges, prefer adding
/// encoding-specific byte allowlists like `WINDOWS_1252_PUNCT` and tests that exercise real-world
/// shell snippets.
fn looks_like_windows_1252_punctuation(bytes: &[u8]) -> bool {
let mut saw_extended_punctuation = false;
let mut saw_ascii_word = false;
for &byte in bytes {
if byte >= 0xA0 {
return false;
}
if (0x80..=0x9F).contains(&byte) {
if !is_windows_1252_punct(byte) {
return false;
}
saw_extended_punctuation = true;
}
if byte.is_ascii_alphabetic() {
saw_ascii_word = true;
}
}
saw_extended_punctuation && saw_ascii_word
}
fn is_windows_1252_punct(byte: u8) -> bool {
WINDOWS_1252_PUNCT_BYTES.contains(&byte)
}
#[cfg(test)]
#[path = "exec_output_tests.rs"]
mod tests;

View File

@@ -0,0 +1,77 @@
//! Integration test for the text encoding fix for issue #6178.
//!
//! These tests simulate VSCode's shell preview on Windows/WSL where the output
//! may be encoded with a legacy code page before it reaches Codex.
use super::StreamOutput;
use pretty_assertions::assert_eq;
#[test]
fn test_utf8_shell_output() {
// Baseline: UTF-8 output should bypass the detector and remain unchanged.
assert_eq!(decode_shell_output("пример".as_bytes()), "пример");
}
#[test]
fn test_cp1251_shell_output() {
// VS Code shells on Windows frequently surface CP1251 bytes for Cyrillic text.
assert_eq!(decode_shell_output(b"\xEF\xF0\xE8\xEC\xE5\xF0"), "пример");
}
#[test]
fn test_cp866_shell_output() {
// Native cmd.exe still defaults to CP866; make sure we recognize that too.
assert_eq!(decode_shell_output(b"\xAF\xE0\xA8\xAC\xA5\xE0"), "пример");
}
#[test]
fn test_windows_1252_smart_decoding() {
// Smart detection should turn fancy quotes/dashes into the proper Unicode glyphs.
assert_eq!(
decode_shell_output(b"\x93\x94 test \x96 dash"),
"\u{201C}\u{201D} test \u{2013} dash"
);
}
#[test]
fn test_smart_decoding_improves_over_lossy_utf8() {
// Regression guard: String::from_utf8_lossy() alone used to emit replacement chars here.
let bytes = b"\x93\x94 test \x96 dash";
assert!(
String::from_utf8_lossy(bytes).contains('\u{FFFD}'),
"lossy UTF-8 should inject replacement chars"
);
assert_eq!(
decode_shell_output(bytes),
"\u{201C}\u{201D} test \u{2013} dash",
"smart decoding should keep curly quotes intact"
);
}
#[test]
fn test_mixed_ascii_and_legacy_encoding() {
// Commands tend to mix ASCII status text with Latin-1 bytes (e.g. café).
assert_eq!(decode_shell_output(b"Output: caf\xE9"), "Output: café"); // codespell:ignore caf
}
#[test]
fn test_pure_latin1_shell_output() {
// Latin-1 by itself should still decode correctly (regression coverage for the older tests).
assert_eq!(decode_shell_output(b"caf\xE9"), "café"); // codespell:ignore caf
}
#[test]
fn test_invalid_bytes_still_fall_back_to_lossy() {
// If detection fails, we still want the user to see replacement characters.
let bytes = b"\xFF\xFE\xFD";
assert_eq!(decode_shell_output(bytes), String::from_utf8_lossy(bytes));
}
fn decode_shell_output(bytes: &[u8]) -> String {
StreamOutput {
text: bytes.to_vec(),
truncated_after_lines: None,
}
.from_utf8_lossy()
.text
}

View File

@@ -1,16 +1,20 @@
pub mod account;
mod agent_path;
pub mod auth;
mod thread_id;
pub use agent_path::AgentPath;
pub use thread_id::ThreadId;
pub mod approvals;
pub mod config_types;
pub mod dynamic_tools;
pub mod error;
pub mod exec_output;
pub mod items;
pub mod mcp;
pub mod memory_citation;
pub mod message_history;
pub mod models;
pub mod network_policy;
pub mod num_format;
pub mod openai_models;
pub mod parse_command;

View File

@@ -0,0 +1,22 @@
use crate::approvals::NetworkApprovalProtocol;
use codex_network_proxy::NetworkDecisionSource;
use codex_network_proxy::NetworkPolicyDecision;
use serde::Deserialize;
#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct NetworkPolicyDecisionPayload {
pub decision: NetworkPolicyDecision,
pub source: NetworkDecisionSource,
#[serde(default)]
pub protocol: Option<NetworkApprovalProtocol>,
pub host: Option<String>,
pub reason: Option<String>,
pub port: Option<u16>,
}
impl NetworkPolicyDecisionPayload {
pub fn is_ask_from_decider(&self) -> bool {
self.decision == NetworkPolicyDecision::Ask && self.source == NetworkDecisionSource::Decider
}
}