mirror of
https://github.com/openai/codex.git
synced 2026-03-05 21:45:28 +03:00
Enforce the Responses API 50 MB image limit
git-stack-id: fjord/original_image_res---4hw7wvctsczel1 git-stack-title: Enforce the Responses API 50 MB image limit
This commit is contained in:
@@ -497,7 +497,7 @@ impl ModelClientSession {
|
||||
service_tier: Option<ServiceTier>,
|
||||
) -> Result<ResponsesApiRequest> {
|
||||
let instructions = &prompt.base_instructions.text;
|
||||
let input = prompt.get_formatted_input();
|
||||
let input = prompt.get_formatted_input()?;
|
||||
let tools = create_tools_json_for_responses_api(&prompt.tools)?;
|
||||
let default_reasoning_effort = model_info.default_reasoning_level;
|
||||
let reasoning = if model_info.supports_reasoning_summaries {
|
||||
|
||||
@@ -1,9 +1,14 @@
|
||||
use crate::client_common::tools::ToolSpec;
|
||||
use crate::config::types::Personality;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::Result;
|
||||
use base64::Engine as _;
|
||||
use base64::prelude::BASE64_STANDARD;
|
||||
pub use codex_api::common::ResponseEvent;
|
||||
use codex_protocol::models::BaseInstructions;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputBody;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use futures::Stream;
|
||||
use serde::Deserialize;
|
||||
@@ -22,6 +27,12 @@ pub const REVIEW_EXIT_SUCCESS_TMPL: &str = include_str!("../templates/review/exi
|
||||
pub const REVIEW_EXIT_INTERRUPTED_TMPL: &str =
|
||||
include_str!("../templates/review/exit_interrupted.xml");
|
||||
|
||||
// See the Responses API image input size limits in the Images and Vision guide:
|
||||
// https://platform.openai.com/docs/guides/images-vision?api-mode=responses&format=file
|
||||
const RESPONSES_API_MAX_INLINE_IMAGE_BYTES: usize = 50_000_000;
|
||||
const RESPONSES_API_MAX_INLINE_IMAGE_BYTES_LABEL: &str = "50 MB";
|
||||
const INLINE_TOOL_IMAGE_OMITTED_PLACEHOLDER: &str = "Codex omitted this tool-returned image because the current request would exceed the Responses API 50 MB total image limit. Request fewer images at a time or inspect them in smaller batches.";
|
||||
|
||||
/// API request payload for a single model turn
|
||||
#[derive(Default, Debug, Clone)]
|
||||
pub struct Prompt {
|
||||
@@ -45,7 +56,7 @@ pub struct Prompt {
|
||||
}
|
||||
|
||||
impl Prompt {
|
||||
pub(crate) fn get_formatted_input(&self) -> Vec<ResponseItem> {
|
||||
pub(crate) fn get_formatted_input(&self) -> Result<Vec<ResponseItem>> {
|
||||
let mut input = self.input.clone();
|
||||
|
||||
// when using the *Freeform* apply_patch tool specifically, tool outputs
|
||||
@@ -60,7 +71,156 @@ impl Prompt {
|
||||
reserialize_shell_outputs(&mut input);
|
||||
}
|
||||
|
||||
input
|
||||
enforce_inline_image_request_budget(&mut input, RESPONSES_API_MAX_INLINE_IMAGE_BYTES)?;
|
||||
|
||||
Ok(input)
|
||||
}
|
||||
}
|
||||
|
||||
fn enforce_inline_image_request_budget(
|
||||
items: &mut [ResponseItem],
|
||||
max_inline_image_bytes: usize,
|
||||
) -> Result<()> {
|
||||
let mut inline_image_bytes = total_inline_image_bytes(items);
|
||||
let mut omitted_model_generated_image = false;
|
||||
|
||||
if inline_image_bytes <= max_inline_image_bytes {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
for item in items.iter_mut().rev() {
|
||||
if inline_image_bytes <= max_inline_image_bytes {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let Some(content_items) = tool_output_content_items_mut(item) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
for content_item in content_items.iter_mut().rev() {
|
||||
if inline_image_bytes <= max_inline_image_bytes {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let FunctionCallOutputContentItem::InputImage { image_url, .. } = content_item else {
|
||||
continue;
|
||||
};
|
||||
let Some(image_bytes) = inline_image_data_url_bytes(image_url) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
*content_item = FunctionCallOutputContentItem::InputText {
|
||||
text: INLINE_TOOL_IMAGE_OMITTED_PLACEHOLDER.to_string(),
|
||||
};
|
||||
inline_image_bytes = inline_image_bytes.saturating_sub(image_bytes);
|
||||
omitted_model_generated_image = true;
|
||||
}
|
||||
}
|
||||
|
||||
Err(CodexErr::InvalidRequest(
|
||||
inline_image_request_budget_exceeded_message(
|
||||
inline_image_bytes,
|
||||
max_inline_image_bytes,
|
||||
omitted_model_generated_image,
|
||||
),
|
||||
))
|
||||
}
|
||||
|
||||
fn total_inline_image_bytes(items: &[ResponseItem]) -> usize {
|
||||
items
|
||||
.iter()
|
||||
.map(response_item_inline_image_bytes)
|
||||
.sum::<usize>()
|
||||
}
|
||||
|
||||
fn response_item_inline_image_bytes(item: &ResponseItem) -> usize {
|
||||
match item {
|
||||
ResponseItem::Message { content, .. } => content
|
||||
.iter()
|
||||
.filter_map(|content_item| match content_item {
|
||||
ContentItem::InputImage { image_url } => inline_image_data_url_bytes(image_url),
|
||||
ContentItem::InputText { .. } | ContentItem::OutputText { .. } => None,
|
||||
})
|
||||
.sum::<usize>(),
|
||||
ResponseItem::FunctionCallOutput { output, .. }
|
||||
| ResponseItem::CustomToolCallOutput { output, .. } => output
|
||||
.content_items()
|
||||
.map(|content_items| {
|
||||
content_items
|
||||
.iter()
|
||||
.filter_map(|content_item| match content_item {
|
||||
FunctionCallOutputContentItem::InputImage { image_url, .. } => {
|
||||
inline_image_data_url_bytes(image_url)
|
||||
}
|
||||
FunctionCallOutputContentItem::InputText { .. } => None,
|
||||
})
|
||||
.sum::<usize>()
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn tool_output_content_items_mut(
|
||||
item: &mut ResponseItem,
|
||||
) -> Option<&mut Vec<FunctionCallOutputContentItem>> {
|
||||
match item {
|
||||
ResponseItem::FunctionCallOutput { output, .. }
|
||||
| ResponseItem::CustomToolCallOutput { output, .. } => output.content_items_mut(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn inline_image_data_url_bytes(url: &str) -> Option<usize> {
|
||||
let payload = parse_base64_image_data_url(url)?;
|
||||
Some(BASE64_STANDARD.decode(payload).ok()?.len())
|
||||
}
|
||||
|
||||
fn parse_base64_image_data_url(url: &str) -> Option<&str> {
|
||||
if !url
|
||||
.get(.."data:".len())
|
||||
.is_some_and(|prefix| prefix.eq_ignore_ascii_case("data:"))
|
||||
{
|
||||
return None;
|
||||
}
|
||||
let comma_index = url.find(',')?;
|
||||
let metadata = &url[..comma_index];
|
||||
let payload = &url[comma_index + 1..];
|
||||
let metadata_without_scheme = &metadata["data:".len()..];
|
||||
let mut metadata_parts = metadata_without_scheme.split(';');
|
||||
let mime_type = metadata_parts.next().unwrap_or_default();
|
||||
let has_base64_marker = metadata_parts.any(|part| part.eq_ignore_ascii_case("base64"));
|
||||
if !mime_type
|
||||
.get(.."image/".len())
|
||||
.is_some_and(|prefix| prefix.eq_ignore_ascii_case("image/"))
|
||||
{
|
||||
return None;
|
||||
}
|
||||
if !has_base64_marker {
|
||||
return None;
|
||||
}
|
||||
Some(payload)
|
||||
}
|
||||
|
||||
fn inline_image_request_budget_exceeded_message(
|
||||
inline_image_bytes: usize,
|
||||
max_inline_image_bytes: usize,
|
||||
omitted_model_generated_image: bool,
|
||||
) -> String {
|
||||
let limit_label = if max_inline_image_bytes == RESPONSES_API_MAX_INLINE_IMAGE_BYTES {
|
||||
RESPONSES_API_MAX_INLINE_IMAGE_BYTES_LABEL.to_string()
|
||||
} else {
|
||||
format!("{max_inline_image_bytes} bytes")
|
||||
};
|
||||
|
||||
if omitted_model_generated_image {
|
||||
format!(
|
||||
"Codex could not send this turn because inline images still total {inline_image_bytes} bytes after omitting all model-generated tool images, exceeding the Responses API {limit_label} total image limit for a single request. Remove some attached images or start a new thread without earlier image attachments."
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"Codex could not send this turn because inline images total {inline_image_bytes} bytes, exceeding the Responses API {limit_label} total image limit for a single request. Remove some attached images or start a new thread without earlier image attachments."
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -230,10 +390,14 @@ impl Stream for ResponseStream {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use base64::Engine as _;
|
||||
use base64::prelude::BASE64_STANDARD;
|
||||
use codex_api::ResponsesApiRequest;
|
||||
use codex_api::common::OpenAiVerbosity;
|
||||
use codex_api::common::TextControls;
|
||||
use codex_api::create_text_param_for_request;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
@@ -399,4 +563,148 @@ mod tests {
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rewrites_newest_tool_images_until_request_is_within_budget() {
|
||||
let mut items = vec![
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputImage {
|
||||
image_url: image_data_url(&[1, 2, 3, 4]),
|
||||
}],
|
||||
end_turn: None,
|
||||
phase: None,
|
||||
},
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-1".to_string(),
|
||||
output: FunctionCallOutputPayload::from_content_items(vec![
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: image_data_url(&[5, 6, 7, 8]),
|
||||
detail: None,
|
||||
},
|
||||
]),
|
||||
},
|
||||
ResponseItem::CustomToolCallOutput {
|
||||
call_id: "call-2".to_string(),
|
||||
output: FunctionCallOutputPayload::from_content_items(vec![
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: image_data_url(&[9, 10, 11, 12]),
|
||||
detail: None,
|
||||
},
|
||||
]),
|
||||
},
|
||||
];
|
||||
|
||||
enforce_inline_image_request_budget(&mut items, 8).expect("request should fit");
|
||||
|
||||
assert_eq!(
|
||||
items,
|
||||
vec![
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputImage {
|
||||
image_url: image_data_url(&[1, 2, 3, 4]),
|
||||
}],
|
||||
end_turn: None,
|
||||
phase: None,
|
||||
},
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-1".to_string(),
|
||||
output: FunctionCallOutputPayload::from_content_items(vec![
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: image_data_url(&[5, 6, 7, 8]),
|
||||
detail: None,
|
||||
},
|
||||
]),
|
||||
},
|
||||
ResponseItem::CustomToolCallOutput {
|
||||
call_id: "call-2".to_string(),
|
||||
output: FunctionCallOutputPayload::from_content_items(vec![
|
||||
FunctionCallOutputContentItem::InputText {
|
||||
text: INLINE_TOOL_IMAGE_OMITTED_PLACEHOLDER.to_string(),
|
||||
},
|
||||
]),
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn errors_when_user_images_still_exceed_request_budget() {
|
||||
let mut items = vec![ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputImage {
|
||||
image_url: image_data_url(&[1, 2, 3, 4]),
|
||||
}],
|
||||
end_turn: None,
|
||||
phase: None,
|
||||
}];
|
||||
|
||||
let err = enforce_inline_image_request_budget(&mut items, 3).expect_err("should fail");
|
||||
|
||||
assert_eq!(
|
||||
err.to_string(),
|
||||
"Codex could not send this turn because inline images total 4 bytes, exceeding the Responses API 3 bytes total image limit for a single request. Remove some attached images or start a new thread without earlier image attachments."
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn errors_after_omitting_tool_images_if_user_images_still_exceed_budget() {
|
||||
let mut items = vec![
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputImage {
|
||||
image_url: image_data_url(&[1, 2, 3, 4]),
|
||||
}],
|
||||
end_turn: None,
|
||||
phase: None,
|
||||
},
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-1".to_string(),
|
||||
output: FunctionCallOutputPayload::from_content_items(vec![
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: image_data_url(&[5, 6, 7, 8]),
|
||||
detail: None,
|
||||
},
|
||||
]),
|
||||
},
|
||||
];
|
||||
|
||||
let err = enforce_inline_image_request_budget(&mut items, 3).expect_err("should fail");
|
||||
|
||||
assert_eq!(
|
||||
err.to_string(),
|
||||
"Codex could not send this turn because inline images still total 4 bytes after omitting all model-generated tool images, exceeding the Responses API 3 bytes total image limit for a single request. Remove some attached images or start a new thread without earlier image attachments."
|
||||
);
|
||||
assert_eq!(
|
||||
items,
|
||||
vec![
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputImage {
|
||||
image_url: image_data_url(&[1, 2, 3, 4]),
|
||||
}],
|
||||
end_turn: None,
|
||||
phase: None,
|
||||
},
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-1".to_string(),
|
||||
output: FunctionCallOutputPayload::from_content_items(vec![
|
||||
FunctionCallOutputContentItem::InputText {
|
||||
text: INLINE_TOOL_IMAGE_OMITTED_PLACEHOLDER.to_string(),
|
||||
},
|
||||
]),
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
fn image_data_url(bytes: &[u8]) -> String {
|
||||
format!("data:image/png;base64,{}", BASE64_STANDARD.encode(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user