mirror of
https://github.com/openai/codex.git
synced 2026-04-28 02:11:08 +03:00
Enforce the Responses API 50 MB image limit
git-stack-id: fjord/original_image_res---4hw7wvctsczel1 git-stack-title: Enforce the Responses API 50 MB image limit
This commit is contained in:
@@ -497,7 +497,7 @@ impl ModelClientSession {
|
|||||||
service_tier: Option<ServiceTier>,
|
service_tier: Option<ServiceTier>,
|
||||||
) -> Result<ResponsesApiRequest> {
|
) -> Result<ResponsesApiRequest> {
|
||||||
let instructions = &prompt.base_instructions.text;
|
let instructions = &prompt.base_instructions.text;
|
||||||
let input = prompt.get_formatted_input();
|
let input = prompt.get_formatted_input()?;
|
||||||
let tools = create_tools_json_for_responses_api(&prompt.tools)?;
|
let tools = create_tools_json_for_responses_api(&prompt.tools)?;
|
||||||
let default_reasoning_effort = model_info.default_reasoning_level;
|
let default_reasoning_effort = model_info.default_reasoning_level;
|
||||||
let reasoning = if model_info.supports_reasoning_summaries {
|
let reasoning = if model_info.supports_reasoning_summaries {
|
||||||
|
|||||||
@@ -1,9 +1,14 @@
|
|||||||
use crate::client_common::tools::ToolSpec;
|
use crate::client_common::tools::ToolSpec;
|
||||||
use crate::config::types::Personality;
|
use crate::config::types::Personality;
|
||||||
|
use crate::error::CodexErr;
|
||||||
use crate::error::Result;
|
use crate::error::Result;
|
||||||
|
use base64::Engine as _;
|
||||||
|
use base64::prelude::BASE64_STANDARD;
|
||||||
pub use codex_api::common::ResponseEvent;
|
pub use codex_api::common::ResponseEvent;
|
||||||
use codex_protocol::models::BaseInstructions;
|
use codex_protocol::models::BaseInstructions;
|
||||||
|
use codex_protocol::models::ContentItem;
|
||||||
use codex_protocol::models::FunctionCallOutputBody;
|
use codex_protocol::models::FunctionCallOutputBody;
|
||||||
|
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||||
use codex_protocol::models::ResponseItem;
|
use codex_protocol::models::ResponseItem;
|
||||||
use futures::Stream;
|
use futures::Stream;
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
@@ -22,6 +27,12 @@ pub const REVIEW_EXIT_SUCCESS_TMPL: &str = include_str!("../templates/review/exi
|
|||||||
pub const REVIEW_EXIT_INTERRUPTED_TMPL: &str =
|
pub const REVIEW_EXIT_INTERRUPTED_TMPL: &str =
|
||||||
include_str!("../templates/review/exit_interrupted.xml");
|
include_str!("../templates/review/exit_interrupted.xml");
|
||||||
|
|
||||||
|
// See the Responses API image input size limits in the Images and Vision guide:
|
||||||
|
// https://platform.openai.com/docs/guides/images-vision?api-mode=responses&format=file
|
||||||
|
const RESPONSES_API_MAX_INLINE_IMAGE_BYTES: usize = 50_000_000;
|
||||||
|
const RESPONSES_API_MAX_INLINE_IMAGE_BYTES_LABEL: &str = "50 MB";
|
||||||
|
const INLINE_TOOL_IMAGE_OMITTED_PLACEHOLDER: &str = "Codex omitted this tool-returned image because the current request would exceed the Responses API 50 MB total image limit. Request fewer images at a time or inspect them in smaller batches.";
|
||||||
|
|
||||||
/// API request payload for a single model turn
|
/// API request payload for a single model turn
|
||||||
#[derive(Default, Debug, Clone)]
|
#[derive(Default, Debug, Clone)]
|
||||||
pub struct Prompt {
|
pub struct Prompt {
|
||||||
@@ -45,7 +56,7 @@ pub struct Prompt {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Prompt {
|
impl Prompt {
|
||||||
pub(crate) fn get_formatted_input(&self) -> Vec<ResponseItem> {
|
pub(crate) fn get_formatted_input(&self) -> Result<Vec<ResponseItem>> {
|
||||||
let mut input = self.input.clone();
|
let mut input = self.input.clone();
|
||||||
|
|
||||||
// when using the *Freeform* apply_patch tool specifically, tool outputs
|
// when using the *Freeform* apply_patch tool specifically, tool outputs
|
||||||
@@ -60,7 +71,156 @@ impl Prompt {
|
|||||||
reserialize_shell_outputs(&mut input);
|
reserialize_shell_outputs(&mut input);
|
||||||
}
|
}
|
||||||
|
|
||||||
input
|
enforce_inline_image_request_budget(&mut input, RESPONSES_API_MAX_INLINE_IMAGE_BYTES)?;
|
||||||
|
|
||||||
|
Ok(input)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn enforce_inline_image_request_budget(
|
||||||
|
items: &mut [ResponseItem],
|
||||||
|
max_inline_image_bytes: usize,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut inline_image_bytes = total_inline_image_bytes(items);
|
||||||
|
let mut omitted_model_generated_image = false;
|
||||||
|
|
||||||
|
if inline_image_bytes <= max_inline_image_bytes {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
for item in items.iter_mut().rev() {
|
||||||
|
if inline_image_bytes <= max_inline_image_bytes {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let Some(content_items) = tool_output_content_items_mut(item) else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
for content_item in content_items.iter_mut().rev() {
|
||||||
|
if inline_image_bytes <= max_inline_image_bytes {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let FunctionCallOutputContentItem::InputImage { image_url, .. } = content_item else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
let Some(image_bytes) = inline_image_data_url_bytes(image_url) else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
*content_item = FunctionCallOutputContentItem::InputText {
|
||||||
|
text: INLINE_TOOL_IMAGE_OMITTED_PLACEHOLDER.to_string(),
|
||||||
|
};
|
||||||
|
inline_image_bytes = inline_image_bytes.saturating_sub(image_bytes);
|
||||||
|
omitted_model_generated_image = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(CodexErr::InvalidRequest(
|
||||||
|
inline_image_request_budget_exceeded_message(
|
||||||
|
inline_image_bytes,
|
||||||
|
max_inline_image_bytes,
|
||||||
|
omitted_model_generated_image,
|
||||||
|
),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn total_inline_image_bytes(items: &[ResponseItem]) -> usize {
|
||||||
|
items
|
||||||
|
.iter()
|
||||||
|
.map(response_item_inline_image_bytes)
|
||||||
|
.sum::<usize>()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn response_item_inline_image_bytes(item: &ResponseItem) -> usize {
|
||||||
|
match item {
|
||||||
|
ResponseItem::Message { content, .. } => content
|
||||||
|
.iter()
|
||||||
|
.filter_map(|content_item| match content_item {
|
||||||
|
ContentItem::InputImage { image_url } => inline_image_data_url_bytes(image_url),
|
||||||
|
ContentItem::InputText { .. } | ContentItem::OutputText { .. } => None,
|
||||||
|
})
|
||||||
|
.sum::<usize>(),
|
||||||
|
ResponseItem::FunctionCallOutput { output, .. }
|
||||||
|
| ResponseItem::CustomToolCallOutput { output, .. } => output
|
||||||
|
.content_items()
|
||||||
|
.map(|content_items| {
|
||||||
|
content_items
|
||||||
|
.iter()
|
||||||
|
.filter_map(|content_item| match content_item {
|
||||||
|
FunctionCallOutputContentItem::InputImage { image_url, .. } => {
|
||||||
|
inline_image_data_url_bytes(image_url)
|
||||||
|
}
|
||||||
|
FunctionCallOutputContentItem::InputText { .. } => None,
|
||||||
|
})
|
||||||
|
.sum::<usize>()
|
||||||
|
})
|
||||||
|
.unwrap_or_default(),
|
||||||
|
_ => 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn tool_output_content_items_mut(
|
||||||
|
item: &mut ResponseItem,
|
||||||
|
) -> Option<&mut Vec<FunctionCallOutputContentItem>> {
|
||||||
|
match item {
|
||||||
|
ResponseItem::FunctionCallOutput { output, .. }
|
||||||
|
| ResponseItem::CustomToolCallOutput { output, .. } => output.content_items_mut(),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn inline_image_data_url_bytes(url: &str) -> Option<usize> {
|
||||||
|
let payload = parse_base64_image_data_url(url)?;
|
||||||
|
Some(BASE64_STANDARD.decode(payload).ok()?.len())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_base64_image_data_url(url: &str) -> Option<&str> {
|
||||||
|
if !url
|
||||||
|
.get(.."data:".len())
|
||||||
|
.is_some_and(|prefix| prefix.eq_ignore_ascii_case("data:"))
|
||||||
|
{
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let comma_index = url.find(',')?;
|
||||||
|
let metadata = &url[..comma_index];
|
||||||
|
let payload = &url[comma_index + 1..];
|
||||||
|
let metadata_without_scheme = &metadata["data:".len()..];
|
||||||
|
let mut metadata_parts = metadata_without_scheme.split(';');
|
||||||
|
let mime_type = metadata_parts.next().unwrap_or_default();
|
||||||
|
let has_base64_marker = metadata_parts.any(|part| part.eq_ignore_ascii_case("base64"));
|
||||||
|
if !mime_type
|
||||||
|
.get(.."image/".len())
|
||||||
|
.is_some_and(|prefix| prefix.eq_ignore_ascii_case("image/"))
|
||||||
|
{
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
if !has_base64_marker {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some(payload)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn inline_image_request_budget_exceeded_message(
|
||||||
|
inline_image_bytes: usize,
|
||||||
|
max_inline_image_bytes: usize,
|
||||||
|
omitted_model_generated_image: bool,
|
||||||
|
) -> String {
|
||||||
|
let limit_label = if max_inline_image_bytes == RESPONSES_API_MAX_INLINE_IMAGE_BYTES {
|
||||||
|
RESPONSES_API_MAX_INLINE_IMAGE_BYTES_LABEL.to_string()
|
||||||
|
} else {
|
||||||
|
format!("{max_inline_image_bytes} bytes")
|
||||||
|
};
|
||||||
|
|
||||||
|
if omitted_model_generated_image {
|
||||||
|
format!(
|
||||||
|
"Codex could not send this turn because inline images still total {inline_image_bytes} bytes after omitting all model-generated tool images, exceeding the Responses API {limit_label} total image limit for a single request. Remove some attached images or start a new thread without earlier image attachments."
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
format!(
|
||||||
|
"Codex could not send this turn because inline images total {inline_image_bytes} bytes, exceeding the Responses API {limit_label} total image limit for a single request. Remove some attached images or start a new thread without earlier image attachments."
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -230,10 +390,14 @@ impl Stream for ResponseStream {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use base64::Engine as _;
|
||||||
|
use base64::prelude::BASE64_STANDARD;
|
||||||
use codex_api::ResponsesApiRequest;
|
use codex_api::ResponsesApiRequest;
|
||||||
use codex_api::common::OpenAiVerbosity;
|
use codex_api::common::OpenAiVerbosity;
|
||||||
use codex_api::common::TextControls;
|
use codex_api::common::TextControls;
|
||||||
use codex_api::create_text_param_for_request;
|
use codex_api::create_text_param_for_request;
|
||||||
|
use codex_protocol::models::ContentItem;
|
||||||
|
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||||
use codex_protocol::models::FunctionCallOutputPayload;
|
use codex_protocol::models::FunctionCallOutputPayload;
|
||||||
use pretty_assertions::assert_eq;
|
use pretty_assertions::assert_eq;
|
||||||
|
|
||||||
@@ -399,4 +563,148 @@ mod tests {
|
|||||||
]
|
]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn rewrites_newest_tool_images_until_request_is_within_budget() {
|
||||||
|
let mut items = vec![
|
||||||
|
ResponseItem::Message {
|
||||||
|
id: None,
|
||||||
|
role: "user".to_string(),
|
||||||
|
content: vec![ContentItem::InputImage {
|
||||||
|
image_url: image_data_url(&[1, 2, 3, 4]),
|
||||||
|
}],
|
||||||
|
end_turn: None,
|
||||||
|
phase: None,
|
||||||
|
},
|
||||||
|
ResponseItem::FunctionCallOutput {
|
||||||
|
call_id: "call-1".to_string(),
|
||||||
|
output: FunctionCallOutputPayload::from_content_items(vec![
|
||||||
|
FunctionCallOutputContentItem::InputImage {
|
||||||
|
image_url: image_data_url(&[5, 6, 7, 8]),
|
||||||
|
detail: None,
|
||||||
|
},
|
||||||
|
]),
|
||||||
|
},
|
||||||
|
ResponseItem::CustomToolCallOutput {
|
||||||
|
call_id: "call-2".to_string(),
|
||||||
|
output: FunctionCallOutputPayload::from_content_items(vec![
|
||||||
|
FunctionCallOutputContentItem::InputImage {
|
||||||
|
image_url: image_data_url(&[9, 10, 11, 12]),
|
||||||
|
detail: None,
|
||||||
|
},
|
||||||
|
]),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
enforce_inline_image_request_budget(&mut items, 8).expect("request should fit");
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
items,
|
||||||
|
vec![
|
||||||
|
ResponseItem::Message {
|
||||||
|
id: None,
|
||||||
|
role: "user".to_string(),
|
||||||
|
content: vec![ContentItem::InputImage {
|
||||||
|
image_url: image_data_url(&[1, 2, 3, 4]),
|
||||||
|
}],
|
||||||
|
end_turn: None,
|
||||||
|
phase: None,
|
||||||
|
},
|
||||||
|
ResponseItem::FunctionCallOutput {
|
||||||
|
call_id: "call-1".to_string(),
|
||||||
|
output: FunctionCallOutputPayload::from_content_items(vec![
|
||||||
|
FunctionCallOutputContentItem::InputImage {
|
||||||
|
image_url: image_data_url(&[5, 6, 7, 8]),
|
||||||
|
detail: None,
|
||||||
|
},
|
||||||
|
]),
|
||||||
|
},
|
||||||
|
ResponseItem::CustomToolCallOutput {
|
||||||
|
call_id: "call-2".to_string(),
|
||||||
|
output: FunctionCallOutputPayload::from_content_items(vec![
|
||||||
|
FunctionCallOutputContentItem::InputText {
|
||||||
|
text: INLINE_TOOL_IMAGE_OMITTED_PLACEHOLDER.to_string(),
|
||||||
|
},
|
||||||
|
]),
|
||||||
|
},
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn errors_when_user_images_still_exceed_request_budget() {
|
||||||
|
let mut items = vec![ResponseItem::Message {
|
||||||
|
id: None,
|
||||||
|
role: "user".to_string(),
|
||||||
|
content: vec![ContentItem::InputImage {
|
||||||
|
image_url: image_data_url(&[1, 2, 3, 4]),
|
||||||
|
}],
|
||||||
|
end_turn: None,
|
||||||
|
phase: None,
|
||||||
|
}];
|
||||||
|
|
||||||
|
let err = enforce_inline_image_request_budget(&mut items, 3).expect_err("should fail");
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
err.to_string(),
|
||||||
|
"Codex could not send this turn because inline images total 4 bytes, exceeding the Responses API 3 bytes total image limit for a single request. Remove some attached images or start a new thread without earlier image attachments."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn errors_after_omitting_tool_images_if_user_images_still_exceed_budget() {
|
||||||
|
let mut items = vec![
|
||||||
|
ResponseItem::Message {
|
||||||
|
id: None,
|
||||||
|
role: "user".to_string(),
|
||||||
|
content: vec![ContentItem::InputImage {
|
||||||
|
image_url: image_data_url(&[1, 2, 3, 4]),
|
||||||
|
}],
|
||||||
|
end_turn: None,
|
||||||
|
phase: None,
|
||||||
|
},
|
||||||
|
ResponseItem::FunctionCallOutput {
|
||||||
|
call_id: "call-1".to_string(),
|
||||||
|
output: FunctionCallOutputPayload::from_content_items(vec![
|
||||||
|
FunctionCallOutputContentItem::InputImage {
|
||||||
|
image_url: image_data_url(&[5, 6, 7, 8]),
|
||||||
|
detail: None,
|
||||||
|
},
|
||||||
|
]),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
let err = enforce_inline_image_request_budget(&mut items, 3).expect_err("should fail");
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
err.to_string(),
|
||||||
|
"Codex could not send this turn because inline images still total 4 bytes after omitting all model-generated tool images, exceeding the Responses API 3 bytes total image limit for a single request. Remove some attached images or start a new thread without earlier image attachments."
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
items,
|
||||||
|
vec![
|
||||||
|
ResponseItem::Message {
|
||||||
|
id: None,
|
||||||
|
role: "user".to_string(),
|
||||||
|
content: vec![ContentItem::InputImage {
|
||||||
|
image_url: image_data_url(&[1, 2, 3, 4]),
|
||||||
|
}],
|
||||||
|
end_turn: None,
|
||||||
|
phase: None,
|
||||||
|
},
|
||||||
|
ResponseItem::FunctionCallOutput {
|
||||||
|
call_id: "call-1".to_string(),
|
||||||
|
output: FunctionCallOutputPayload::from_content_items(vec![
|
||||||
|
FunctionCallOutputContentItem::InputText {
|
||||||
|
text: INLINE_TOOL_IMAGE_OMITTED_PLACEHOLDER.to_string(),
|
||||||
|
},
|
||||||
|
]),
|
||||||
|
},
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn image_data_url(bytes: &[u8]) -> String {
|
||||||
|
format!("data:image/png;base64,{}", BASE64_STANDARD.encode(bytes))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user