Enforce the Responses API 50 MB image limit

git-stack-id: fjord/original_image_res---4hw7wvctsczel1 git-stack-title: Enforce the Responses API 50 MB image limit
2026-03-05 21:45:28 +03:00 · 2026-02-27 11:40:38 -08:00
parent 52521a5e40
commit 353d49014e
2 changed files with 311 additions and 3 deletions
--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -497,7 +497,7 @@ impl ModelClientSession {
        service_tier: Option<ServiceTier>,
    ) -> Result<ResponsesApiRequest> {
        let instructions = &prompt.base_instructions.text;
-        let input = prompt.get_formatted_input();
+        let input = prompt.get_formatted_input()?;
        let tools = create_tools_json_for_responses_api(&prompt.tools)?;
        let default_reasoning_effort = model_info.default_reasoning_level;
        let reasoning = if model_info.supports_reasoning_summaries {
--- a/codex-rs/core/src/client_common.rs
+++ b/codex-rs/core/src/client_common.rs
@@ -1,9 +1,14 @@
 use crate::client_common::tools::ToolSpec;
 use crate::config::types::Personality;
+use crate::error::CodexErr;
 use crate::error::Result;
+use base64::Engine as _;
+use base64::prelude::BASE64_STANDARD;
 pub use codex_api::common::ResponseEvent;
 use codex_protocol::models::BaseInstructions;
+use codex_protocol::models::ContentItem;
 use codex_protocol::models::FunctionCallOutputBody;
+use codex_protocol::models::FunctionCallOutputContentItem;
 use codex_protocol::models::ResponseItem;
 use futures::Stream;
 use serde::Deserialize;
@@ -22,6 +27,12 @@ pub const REVIEW_EXIT_SUCCESS_TMPL: &str = include_str!("../templates/review/exi
 pub const REVIEW_EXIT_INTERRUPTED_TMPL: &str =
    include_str!("../templates/review/exit_interrupted.xml");

+// See the Responses API image input size limits in the Images and Vision guide:
+// https://platform.openai.com/docs/guides/images-vision?api-mode=responses&format=file
+const RESPONSES_API_MAX_INLINE_IMAGE_BYTES: usize = 50_000_000;
+const RESPONSES_API_MAX_INLINE_IMAGE_BYTES_LABEL: &str = "50 MB";
+const INLINE_TOOL_IMAGE_OMITTED_PLACEHOLDER: &str = "Codex omitted this tool-returned image because the current request would exceed the Responses API 50 MB total image limit. Request fewer images at a time or inspect them in smaller batches.";
+
 /// API request payload for a single model turn
 #[derive(Default, Debug, Clone)]
 pub struct Prompt {
@@ -45,7 +56,7 @@ pub struct Prompt {
 }

 impl Prompt {
-    pub(crate) fn get_formatted_input(&self) -> Vec<ResponseItem> {
+    pub(crate) fn get_formatted_input(&self) -> Result<Vec<ResponseItem>> {
        let mut input = self.input.clone();

        // when using the *Freeform* apply_patch tool specifically, tool outputs
@@ -60,7 +71,156 @@ impl Prompt {
            reserialize_shell_outputs(&mut input);
        }

-        input
+        enforce_inline_image_request_budget(&mut input, RESPONSES_API_MAX_INLINE_IMAGE_BYTES)?;
+
+        Ok(input)
+    }
+}
+
+fn enforce_inline_image_request_budget(
+    items: &mut [ResponseItem],
+    max_inline_image_bytes: usize,
+) -> Result<()> {
+    let mut inline_image_bytes = total_inline_image_bytes(items);
+    let mut omitted_model_generated_image = false;
+
+    if inline_image_bytes <= max_inline_image_bytes {
+        return Ok(());
+    }
+
+    for item in items.iter_mut().rev() {
+        if inline_image_bytes <= max_inline_image_bytes {
+            return Ok(());
+        }
+
+        let Some(content_items) = tool_output_content_items_mut(item) else {
+            continue;
+        };
+
+        for content_item in content_items.iter_mut().rev() {
+            if inline_image_bytes <= max_inline_image_bytes {
+                return Ok(());
+            }
+
+            let FunctionCallOutputContentItem::InputImage { image_url, .. } = content_item else {
+                continue;
+            };
+            let Some(image_bytes) = inline_image_data_url_bytes(image_url) else {
+                continue;
+            };
+
+            *content_item = FunctionCallOutputContentItem::InputText {
+                text: INLINE_TOOL_IMAGE_OMITTED_PLACEHOLDER.to_string(),
+            };
+            inline_image_bytes = inline_image_bytes.saturating_sub(image_bytes);
+            omitted_model_generated_image = true;
+        }
+    }
+
+    Err(CodexErr::InvalidRequest(
+        inline_image_request_budget_exceeded_message(
+            inline_image_bytes,
+            max_inline_image_bytes,
+            omitted_model_generated_image,
+        ),
+    ))
+}
+
+fn total_inline_image_bytes(items: &[ResponseItem]) -> usize {
+    items
+        .iter()
+        .map(response_item_inline_image_bytes)
+        .sum::<usize>()
+}
+
+fn response_item_inline_image_bytes(item: &ResponseItem) -> usize {
+    match item {
+        ResponseItem::Message { content, .. } => content
+            .iter()
+            .filter_map(|content_item| match content_item {
+                ContentItem::InputImage { image_url } => inline_image_data_url_bytes(image_url),
+                ContentItem::InputText { .. } | ContentItem::OutputText { .. } => None,
+            })
+            .sum::<usize>(),
+        ResponseItem::FunctionCallOutput { output, .. }
+        | ResponseItem::CustomToolCallOutput { output, .. } => output
+            .content_items()
+            .map(|content_items| {
+                content_items
+                    .iter()
+                    .filter_map(|content_item| match content_item {
+                        FunctionCallOutputContentItem::InputImage { image_url, .. } => {
+                            inline_image_data_url_bytes(image_url)
+                        }
+                        FunctionCallOutputContentItem::InputText { .. } => None,
+                    })
+                    .sum::<usize>()
+            })
+            .unwrap_or_default(),
+        _ => 0,
+    }
+}
+
+fn tool_output_content_items_mut(
+    item: &mut ResponseItem,
+) -> Option<&mut Vec<FunctionCallOutputContentItem>> {
+    match item {
+        ResponseItem::FunctionCallOutput { output, .. }
+        | ResponseItem::CustomToolCallOutput { output, .. } => output.content_items_mut(),
+        _ => None,
+    }
+}
+
+fn inline_image_data_url_bytes(url: &str) -> Option<usize> {
+    let payload = parse_base64_image_data_url(url)?;
+    Some(BASE64_STANDARD.decode(payload).ok()?.len())
+}
+
+fn parse_base64_image_data_url(url: &str) -> Option<&str> {
+    if !url
+        .get(.."data:".len())
+        .is_some_and(|prefix| prefix.eq_ignore_ascii_case("data:"))
+    {
+        return None;
+    }
+    let comma_index = url.find(',')?;
+    let metadata = &url[..comma_index];
+    let payload = &url[comma_index + 1..];
+    let metadata_without_scheme = &metadata["data:".len()..];
+    let mut metadata_parts = metadata_without_scheme.split(';');
+    let mime_type = metadata_parts.next().unwrap_or_default();
+    let has_base64_marker = metadata_parts.any(|part| part.eq_ignore_ascii_case("base64"));
+    if !mime_type
+        .get(.."image/".len())
+        .is_some_and(|prefix| prefix.eq_ignore_ascii_case("image/"))
+    {
+        return None;
+    }
+    if !has_base64_marker {
+        return None;
+    }
+    Some(payload)
+}
+
+fn inline_image_request_budget_exceeded_message(
+    inline_image_bytes: usize,
+    max_inline_image_bytes: usize,
+    omitted_model_generated_image: bool,
+) -> String {
+    let limit_label = if max_inline_image_bytes == RESPONSES_API_MAX_INLINE_IMAGE_BYTES {
+        RESPONSES_API_MAX_INLINE_IMAGE_BYTES_LABEL.to_string()
+    } else {
+        format!("{max_inline_image_bytes} bytes")
+    };
+
+    if omitted_model_generated_image {
+        format!(
+            "Codex could not send this turn because inline images still total {inline_image_bytes} bytes after omitting all model-generated tool images, exceeding the Responses API {limit_label} total image limit for a single request. Remove some attached images or start a new thread without earlier image attachments."
+        )
+    } else {
+        format!(
+            "Codex could not send this turn because inline images total {inline_image_bytes} bytes, exceeding the Responses API {limit_label} total image limit for a single request. Remove some attached images or start a new thread without earlier image attachments."
+        )
    }
 }

@@ -230,10 +390,14 @@ impl Stream for ResponseStream {

 #[cfg(test)]
 mod tests {
+    use base64::Engine as _;
+    use base64::prelude::BASE64_STANDARD;
    use codex_api::ResponsesApiRequest;
    use codex_api::common::OpenAiVerbosity;
    use codex_api::common::TextControls;
    use codex_api::create_text_param_for_request;
+    use codex_protocol::models::ContentItem;
+    use codex_protocol::models::FunctionCallOutputContentItem;
    use codex_protocol::models::FunctionCallOutputPayload;
    use pretty_assertions::assert_eq;

@@ -399,4 +563,148 @@ mod tests {
            ]
        );
    }
+
+    #[test]
+    fn rewrites_newest_tool_images_until_request_is_within_budget() {
+        let mut items = vec![
+            ResponseItem::Message {
+                id: None,
+                role: "user".to_string(),
+                content: vec![ContentItem::InputImage {
+                    image_url: image_data_url(&[1, 2, 3, 4]),
+                }],
+                end_turn: None,
+                phase: None,
+            },
+            ResponseItem::FunctionCallOutput {
+                call_id: "call-1".to_string(),
+                output: FunctionCallOutputPayload::from_content_items(vec![
+                    FunctionCallOutputContentItem::InputImage {
+                        image_url: image_data_url(&[5, 6, 7, 8]),
+                        detail: None,
+                    },
+                ]),
+            },
+            ResponseItem::CustomToolCallOutput {
+                call_id: "call-2".to_string(),
+                output: FunctionCallOutputPayload::from_content_items(vec![
+                    FunctionCallOutputContentItem::InputImage {
+                        image_url: image_data_url(&[9, 10, 11, 12]),
+                        detail: None,
+                    },
+                ]),
+            },
+        ];
+
+        enforce_inline_image_request_budget(&mut items, 8).expect("request should fit");
+
+        assert_eq!(
+            items,
+            vec![
+                ResponseItem::Message {
+                    id: None,
+                    role: "user".to_string(),
+                    content: vec![ContentItem::InputImage {
+                        image_url: image_data_url(&[1, 2, 3, 4]),
+                    }],
+                    end_turn: None,
+                    phase: None,
+                },
+                ResponseItem::FunctionCallOutput {
+                    call_id: "call-1".to_string(),
+                    output: FunctionCallOutputPayload::from_content_items(vec![
+                        FunctionCallOutputContentItem::InputImage {
+                            image_url: image_data_url(&[5, 6, 7, 8]),
+                            detail: None,
+                        },
+                    ]),
+                },
+                ResponseItem::CustomToolCallOutput {
+                    call_id: "call-2".to_string(),
+                    output: FunctionCallOutputPayload::from_content_items(vec![
+                        FunctionCallOutputContentItem::InputText {
+                            text: INLINE_TOOL_IMAGE_OMITTED_PLACEHOLDER.to_string(),
+                        },
+                    ]),
+                },
+            ]
+        );
+    }
+
+    #[test]
+    fn errors_when_user_images_still_exceed_request_budget() {
+        let mut items = vec![ResponseItem::Message {
+            id: None,
+            role: "user".to_string(),
+            content: vec![ContentItem::InputImage {
+                image_url: image_data_url(&[1, 2, 3, 4]),
+            }],
+            end_turn: None,
+            phase: None,
+        }];
+
+        let err = enforce_inline_image_request_budget(&mut items, 3).expect_err("should fail");
+
+        assert_eq!(
+            err.to_string(),
+            "Codex could not send this turn because inline images total 4 bytes, exceeding the Responses API 3 bytes total image limit for a single request. Remove some attached images or start a new thread without earlier image attachments."
+        );
+    }
+
+    #[test]
+    fn errors_after_omitting_tool_images_if_user_images_still_exceed_budget() {
+        let mut items = vec![
+            ResponseItem::Message {
+                id: None,
+                role: "user".to_string(),
+                content: vec![ContentItem::InputImage {
+                    image_url: image_data_url(&[1, 2, 3, 4]),
+                }],
+                end_turn: None,
+                phase: None,
+            },
+            ResponseItem::FunctionCallOutput {
+                call_id: "call-1".to_string(),
+                output: FunctionCallOutputPayload::from_content_items(vec![
+                    FunctionCallOutputContentItem::InputImage {
+                        image_url: image_data_url(&[5, 6, 7, 8]),
+                        detail: None,
+                    },
+                ]),
+            },
+        ];
+
+        let err = enforce_inline_image_request_budget(&mut items, 3).expect_err("should fail");
+
+        assert_eq!(
+            err.to_string(),
+            "Codex could not send this turn because inline images still total 4 bytes after omitting all model-generated tool images, exceeding the Responses API 3 bytes total image limit for a single request. Remove some attached images or start a new thread without earlier image attachments."
+        );
+        assert_eq!(
+            items,
+            vec![
+                ResponseItem::Message {
+                    id: None,
+                    role: "user".to_string(),
+                    content: vec![ContentItem::InputImage {
+                        image_url: image_data_url(&[1, 2, 3, 4]),
+                    }],
+                    end_turn: None,
+                    phase: None,
+                },
+                ResponseItem::FunctionCallOutput {
+                    call_id: "call-1".to_string(),
+                    output: FunctionCallOutputPayload::from_content_items(vec![
+                        FunctionCallOutputContentItem::InputText {
+                            text: INLINE_TOOL_IMAGE_OMITTED_PLACEHOLDER.to_string(),
+                        },
+                    ]),
+                },
+            ]
+        );
+    }
+
+    fn image_data_url(bytes: &[u8]) -> String {
+        format!("data:image/png;base64,{}", BASE64_STANDARD.encode(bytes))
+    }
 }