Enforce the Responses API 50 MB image limit

git-stack-id: fjord/original_image_res---4hw7wvctsczel1
git-stack-title: Enforce the Responses API 50 MB image limit
This commit is contained in:
Curtis 'Fjord' Hawthorne
2026-02-27 11:40:38 -08:00
parent 52521a5e40
commit 353d49014e
2 changed files with 311 additions and 3 deletions

View File

@@ -497,7 +497,7 @@ impl ModelClientSession {
service_tier: Option<ServiceTier>,
) -> Result<ResponsesApiRequest> {
let instructions = &prompt.base_instructions.text;
let input = prompt.get_formatted_input();
let input = prompt.get_formatted_input()?;
let tools = create_tools_json_for_responses_api(&prompt.tools)?;
let default_reasoning_effort = model_info.default_reasoning_level;
let reasoning = if model_info.supports_reasoning_summaries {

View File

@@ -1,9 +1,14 @@
use crate::client_common::tools::ToolSpec;
use crate::config::types::Personality;
use crate::error::CodexErr;
use crate::error::Result;
use base64::Engine as _;
use base64::prelude::BASE64_STANDARD;
pub use codex_api::common::ResponseEvent;
use codex_protocol::models::BaseInstructions;
use codex_protocol::models::ContentItem;
use codex_protocol::models::FunctionCallOutputBody;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::ResponseItem;
use futures::Stream;
use serde::Deserialize;
@@ -22,6 +27,12 @@ pub const REVIEW_EXIT_SUCCESS_TMPL: &str = include_str!("../templates/review/exi
pub const REVIEW_EXIT_INTERRUPTED_TMPL: &str =
include_str!("../templates/review/exit_interrupted.xml");
// See the Responses API image input size limits in the Images and Vision guide:
// https://platform.openai.com/docs/guides/images-vision?api-mode=responses&format=file
const RESPONSES_API_MAX_INLINE_IMAGE_BYTES: usize = 50_000_000;
const RESPONSES_API_MAX_INLINE_IMAGE_BYTES_LABEL: &str = "50 MB";
const INLINE_TOOL_IMAGE_OMITTED_PLACEHOLDER: &str = "Codex omitted this tool-returned image because the current request would exceed the Responses API 50 MB total image limit. Request fewer images at a time or inspect them in smaller batches.";
/// API request payload for a single model turn
#[derive(Default, Debug, Clone)]
pub struct Prompt {
@@ -45,7 +56,7 @@ pub struct Prompt {
}
impl Prompt {
pub(crate) fn get_formatted_input(&self) -> Vec<ResponseItem> {
pub(crate) fn get_formatted_input(&self) -> Result<Vec<ResponseItem>> {
let mut input = self.input.clone();
// when using the *Freeform* apply_patch tool specifically, tool outputs
@@ -60,7 +71,156 @@ impl Prompt {
reserialize_shell_outputs(&mut input);
}
input
enforce_inline_image_request_budget(&mut input, RESPONSES_API_MAX_INLINE_IMAGE_BYTES)?;
Ok(input)
}
}
fn enforce_inline_image_request_budget(
items: &mut [ResponseItem],
max_inline_image_bytes: usize,
) -> Result<()> {
let mut inline_image_bytes = total_inline_image_bytes(items);
let mut omitted_model_generated_image = false;
if inline_image_bytes <= max_inline_image_bytes {
return Ok(());
}
for item in items.iter_mut().rev() {
if inline_image_bytes <= max_inline_image_bytes {
return Ok(());
}
let Some(content_items) = tool_output_content_items_mut(item) else {
continue;
};
for content_item in content_items.iter_mut().rev() {
if inline_image_bytes <= max_inline_image_bytes {
return Ok(());
}
let FunctionCallOutputContentItem::InputImage { image_url, .. } = content_item else {
continue;
};
let Some(image_bytes) = inline_image_data_url_bytes(image_url) else {
continue;
};
*content_item = FunctionCallOutputContentItem::InputText {
text: INLINE_TOOL_IMAGE_OMITTED_PLACEHOLDER.to_string(),
};
inline_image_bytes = inline_image_bytes.saturating_sub(image_bytes);
omitted_model_generated_image = true;
}
}
Err(CodexErr::InvalidRequest(
inline_image_request_budget_exceeded_message(
inline_image_bytes,
max_inline_image_bytes,
omitted_model_generated_image,
),
))
}
fn total_inline_image_bytes(items: &[ResponseItem]) -> usize {
items
.iter()
.map(response_item_inline_image_bytes)
.sum::<usize>()
}
fn response_item_inline_image_bytes(item: &ResponseItem) -> usize {
match item {
ResponseItem::Message { content, .. } => content
.iter()
.filter_map(|content_item| match content_item {
ContentItem::InputImage { image_url } => inline_image_data_url_bytes(image_url),
ContentItem::InputText { .. } | ContentItem::OutputText { .. } => None,
})
.sum::<usize>(),
ResponseItem::FunctionCallOutput { output, .. }
| ResponseItem::CustomToolCallOutput { output, .. } => output
.content_items()
.map(|content_items| {
content_items
.iter()
.filter_map(|content_item| match content_item {
FunctionCallOutputContentItem::InputImage { image_url, .. } => {
inline_image_data_url_bytes(image_url)
}
FunctionCallOutputContentItem::InputText { .. } => None,
})
.sum::<usize>()
})
.unwrap_or_default(),
_ => 0,
}
}
fn tool_output_content_items_mut(
item: &mut ResponseItem,
) -> Option<&mut Vec<FunctionCallOutputContentItem>> {
match item {
ResponseItem::FunctionCallOutput { output, .. }
| ResponseItem::CustomToolCallOutput { output, .. } => output.content_items_mut(),
_ => None,
}
}
fn inline_image_data_url_bytes(url: &str) -> Option<usize> {
let payload = parse_base64_image_data_url(url)?;
Some(BASE64_STANDARD.decode(payload).ok()?.len())
}
fn parse_base64_image_data_url(url: &str) -> Option<&str> {
if !url
.get(.."data:".len())
.is_some_and(|prefix| prefix.eq_ignore_ascii_case("data:"))
{
return None;
}
let comma_index = url.find(',')?;
let metadata = &url[..comma_index];
let payload = &url[comma_index + 1..];
let metadata_without_scheme = &metadata["data:".len()..];
let mut metadata_parts = metadata_without_scheme.split(';');
let mime_type = metadata_parts.next().unwrap_or_default();
let has_base64_marker = metadata_parts.any(|part| part.eq_ignore_ascii_case("base64"));
if !mime_type
.get(.."image/".len())
.is_some_and(|prefix| prefix.eq_ignore_ascii_case("image/"))
{
return None;
}
if !has_base64_marker {
return None;
}
Some(payload)
}
fn inline_image_request_budget_exceeded_message(
inline_image_bytes: usize,
max_inline_image_bytes: usize,
omitted_model_generated_image: bool,
) -> String {
let limit_label = if max_inline_image_bytes == RESPONSES_API_MAX_INLINE_IMAGE_BYTES {
RESPONSES_API_MAX_INLINE_IMAGE_BYTES_LABEL.to_string()
} else {
format!("{max_inline_image_bytes} bytes")
};
if omitted_model_generated_image {
format!(
"Codex could not send this turn because inline images still total {inline_image_bytes} bytes after omitting all model-generated tool images, exceeding the Responses API {limit_label} total image limit for a single request. Remove some attached images or start a new thread without earlier image attachments."
)
} else {
format!(
"Codex could not send this turn because inline images total {inline_image_bytes} bytes, exceeding the Responses API {limit_label} total image limit for a single request. Remove some attached images or start a new thread without earlier image attachments."
)
}
}
@@ -230,10 +390,14 @@ impl Stream for ResponseStream {
#[cfg(test)]
mod tests {
use base64::Engine as _;
use base64::prelude::BASE64_STANDARD;
use codex_api::ResponsesApiRequest;
use codex_api::common::OpenAiVerbosity;
use codex_api::common::TextControls;
use codex_api::create_text_param_for_request;
use codex_protocol::models::ContentItem;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
use pretty_assertions::assert_eq;
@@ -399,4 +563,148 @@ mod tests {
]
);
}
#[test]
fn rewrites_newest_tool_images_until_request_is_within_budget() {
let mut items = vec![
ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputImage {
image_url: image_data_url(&[1, 2, 3, 4]),
}],
end_turn: None,
phase: None,
},
ResponseItem::FunctionCallOutput {
call_id: "call-1".to_string(),
output: FunctionCallOutputPayload::from_content_items(vec![
FunctionCallOutputContentItem::InputImage {
image_url: image_data_url(&[5, 6, 7, 8]),
detail: None,
},
]),
},
ResponseItem::CustomToolCallOutput {
call_id: "call-2".to_string(),
output: FunctionCallOutputPayload::from_content_items(vec![
FunctionCallOutputContentItem::InputImage {
image_url: image_data_url(&[9, 10, 11, 12]),
detail: None,
},
]),
},
];
enforce_inline_image_request_budget(&mut items, 8).expect("request should fit");
assert_eq!(
items,
vec![
ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputImage {
image_url: image_data_url(&[1, 2, 3, 4]),
}],
end_turn: None,
phase: None,
},
ResponseItem::FunctionCallOutput {
call_id: "call-1".to_string(),
output: FunctionCallOutputPayload::from_content_items(vec![
FunctionCallOutputContentItem::InputImage {
image_url: image_data_url(&[5, 6, 7, 8]),
detail: None,
},
]),
},
ResponseItem::CustomToolCallOutput {
call_id: "call-2".to_string(),
output: FunctionCallOutputPayload::from_content_items(vec![
FunctionCallOutputContentItem::InputText {
text: INLINE_TOOL_IMAGE_OMITTED_PLACEHOLDER.to_string(),
},
]),
},
]
);
}
#[test]
fn errors_when_user_images_still_exceed_request_budget() {
let mut items = vec![ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputImage {
image_url: image_data_url(&[1, 2, 3, 4]),
}],
end_turn: None,
phase: None,
}];
let err = enforce_inline_image_request_budget(&mut items, 3).expect_err("should fail");
assert_eq!(
err.to_string(),
"Codex could not send this turn because inline images total 4 bytes, exceeding the Responses API 3 bytes total image limit for a single request. Remove some attached images or start a new thread without earlier image attachments."
);
}
#[test]
fn errors_after_omitting_tool_images_if_user_images_still_exceed_budget() {
let mut items = vec![
ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputImage {
image_url: image_data_url(&[1, 2, 3, 4]),
}],
end_turn: None,
phase: None,
},
ResponseItem::FunctionCallOutput {
call_id: "call-1".to_string(),
output: FunctionCallOutputPayload::from_content_items(vec![
FunctionCallOutputContentItem::InputImage {
image_url: image_data_url(&[5, 6, 7, 8]),
detail: None,
},
]),
},
];
let err = enforce_inline_image_request_budget(&mut items, 3).expect_err("should fail");
assert_eq!(
err.to_string(),
"Codex could not send this turn because inline images still total 4 bytes after omitting all model-generated tool images, exceeding the Responses API 3 bytes total image limit for a single request. Remove some attached images or start a new thread without earlier image attachments."
);
assert_eq!(
items,
vec![
ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputImage {
image_url: image_data_url(&[1, 2, 3, 4]),
}],
end_turn: None,
phase: None,
},
ResponseItem::FunctionCallOutput {
call_id: "call-1".to_string(),
output: FunctionCallOutputPayload::from_content_items(vec![
FunctionCallOutputContentItem::InputText {
text: INLINE_TOOL_IMAGE_OMITTED_PLACEHOLDER.to_string(),
},
]),
},
]
);
}
fn image_data_url(bytes: &[u8]) -> String {
format!("data:image/png;base64,{}", BASE64_STANDARD.encode(bytes))
}
}