feat(app-server, core): allow text + image content items for dynamic tool outputs (#10567)

Took over the work that @aaronl-openai started here: https://github.com/openai/codex/pull/10397 Now that app-server clients are able to set up custom tools (called `dynamic_tools` in app-server), we should expose a way for clients to pass in not just text, but also image outputs. This is something the Responses API already supports for function call outputs, where you can pass in either a string or an array of content outputs (text, image, file): https://platform.openai.com/docs/api-reference/responses/create#responses_create-input-input_item_list-item-function_tool_call_output-output-array-input_image So let's just plumb it through in Codex (with the caveat that we only support text and image for now). This is implemented end-to-end across app-server v2 protocol types and core tool handling. ## Breaking API change NOTE: This introduces a breaking change with dynamic tools, but I think it's ok since this concept was only recently introduced (https://github.com/openai/codex/pull/9539) and it's better to get the API contract correct. I don't think there are any real consumers of this yet (not even the Codex App). Old shape: `{ "output": "dynamic-ok", "success": true }` New shape: ``` { "contentItems": [ { "type": "inputText", "text": "dynamic-ok" }, { "type": "inputImage", "imageUrl": "data:image/png;base64,AAA" } ] "success": true } ```
2026-05-03 21:01:55 +03:00 · 2026-02-04 16:12:47 -08:00
parent f9c38f531c
commit 5ea107a088
49 changed files with 1103 additions and 468 deletions
--- a/codex-rs/core/src/context_manager/history.rs
+++ b/codex-rs/core/src/context_manager/history.rs
@@ -10,6 +10,7 @@ use crate::truncate::truncate_function_output_items_with_policy;
 use crate::truncate::truncate_text;
 use crate::user_shell_command::is_user_shell_command_text;
 use codex_protocol::models::ContentItem;
+use codex_protocol::models::FunctionCallOutputBody;
 use codex_protocol::models::FunctionCallOutputContentItem;
 use codex_protocol::models::FunctionCallOutputPayload;
 use codex_protocol::models::ResponseItem;
@@ -134,7 +135,7 @@ impl ContextManager {

        match &mut self.items[index] {
            ResponseItem::FunctionCallOutput { output, .. } => {
-                let Some(content_items) = output.content_items.as_mut() else {
+                let Some(content_items) = output.content_items_mut() else {
                    return false;
                };
                let mut replaced = false;
@@ -268,19 +269,23 @@ impl ContextManager {
        let policy_with_serialization_budget = policy * 1.2;
        match item {
            ResponseItem::FunctionCallOutput { call_id, output } => {
-                let truncated =
-                    truncate_text(output.content.as_str(), policy_with_serialization_budget);
-                let truncated_items = output.content_items.as_ref().map(|items| {
-                    truncate_function_output_items_with_policy(
-                        items,
-                        policy_with_serialization_budget,
-                    )
-                });
+                let body = match &output.body {
+                    FunctionCallOutputBody::Text(content) => FunctionCallOutputBody::Text(
+                        truncate_text(content, policy_with_serialization_budget),
+                    ),
+                    FunctionCallOutputBody::ContentItems(items) => {
+                        FunctionCallOutputBody::ContentItems(
+                            truncate_function_output_items_with_policy(
+                                items,
+                                policy_with_serialization_budget,
+                            ),
+                        )
+                    }
+                };
                ResponseItem::FunctionCallOutput {
                    call_id: call_id.clone(),
                    output: FunctionCallOutputPayload {
-                        content: truncated,
-                        content_items: truncated_items,
+                        body,
                        success: output.success,
                    },
                }
--- a/codex-rs/core/src/context_manager/history_tests.rs
+++ b/codex-rs/core/src/context_manager/history_tests.rs
@@ -3,6 +3,7 @@ use crate::truncate;
 use crate::truncate::TruncationPolicy;
 use codex_git::GhostCommit;
 use codex_protocol::models::ContentItem;
+use codex_protocol::models::FunctionCallOutputBody;
 use codex_protocol::models::FunctionCallOutputContentItem;
 use codex_protocol::models::FunctionCallOutputPayload;
 use codex_protocol::models::LocalShellAction;
@@ -63,10 +64,7 @@ fn user_input_text_msg(text: &str) -> ResponseItem {
 fn function_call_output(call_id: &str, content: &str) -> ResponseItem {
    ResponseItem::FunctionCallOutput {
        call_id: call_id.to_string(),
-        output: FunctionCallOutputPayload {
-            content: content.to_string(),
-            ..Default::default()
-        },
+        output: FunctionCallOutputPayload::from_text(content.to_string()),
    }
 }

@@ -263,10 +261,7 @@ fn remove_first_item_removes_matching_output_for_function_call() {
        },
        ResponseItem::FunctionCallOutput {
            call_id: "call-1".to_string(),
-            output: FunctionCallOutputPayload {
-                content: "ok".to_string(),
-                ..Default::default()
-            },
+            output: FunctionCallOutputPayload::from_text("ok".to_string()),
        },
    ];
    let mut h = create_history_with_items(items);
@@ -279,10 +274,7 @@ fn remove_first_item_removes_matching_call_for_output() {
    let items = vec![
        ResponseItem::FunctionCallOutput {
            call_id: "call-2".to_string(),
-            output: FunctionCallOutputPayload {
-                content: "ok".to_string(),
-                ..Default::default()
-            },
+            output: FunctionCallOutputPayload::from_text("ok".to_string()),
        },
        ResponseItem::FunctionCall {
            id: None,
@@ -308,10 +300,7 @@ fn remove_last_item_removes_matching_call_for_output() {
        },
        ResponseItem::FunctionCallOutput {
            call_id: "call-delete-last".to_string(),
-            output: FunctionCallOutputPayload {
-                content: "ok".to_string(),
-                ..Default::default()
-            },
+            output: FunctionCallOutputPayload::from_text("ok".to_string()),
        },
    ];
    let mut h = create_history_with_items(items);
@@ -327,10 +316,11 @@ fn replace_last_turn_images_replaces_tool_output_images() {
        ResponseItem::FunctionCallOutput {
            call_id: "call-1".to_string(),
            output: FunctionCallOutputPayload {
-                content: "ok".to_string(),
-                content_items: Some(vec![FunctionCallOutputContentItem::InputImage {
-                    image_url: "data:image/png;base64,AAA".to_string(),
-                }]),
+                body: FunctionCallOutputBody::ContentItems(vec![
+                    FunctionCallOutputContentItem::InputImage {
+                        image_url: "data:image/png;base64,AAA".to_string(),
+                    },
+                ]),
                success: Some(true),
            },
        },
@@ -346,10 +336,11 @@ fn replace_last_turn_images_replaces_tool_output_images() {
            ResponseItem::FunctionCallOutput {
                call_id: "call-1".to_string(),
                output: FunctionCallOutputPayload {
-                    content: "ok".to_string(),
-                    content_items: Some(vec![FunctionCallOutputContentItem::InputText {
-                        text: "Invalid image".to_string(),
-                    }]),
+                    body: FunctionCallOutputBody::ContentItems(vec![
+                        FunctionCallOutputContentItem::InputText {
+                            text: "Invalid image".to_string(),
+                        },
+                    ]),
                    success: Some(true),
                },
            },
@@ -391,10 +382,7 @@ fn remove_first_item_handles_local_shell_pair() {
        },
        ResponseItem::FunctionCallOutput {
            call_id: "call-3".to_string(),
-            output: FunctionCallOutputPayload {
-                content: "ok".to_string(),
-                ..Default::default()
-            },
+            output: FunctionCallOutputPayload::from_text("ok".to_string()),
        },
    ];
    let mut h = create_history_with_items(items);
@@ -560,10 +548,7 @@ fn normalization_retains_local_shell_outputs() {
        },
        ResponseItem::FunctionCallOutput {
            call_id: "shell-1".to_string(),
-            output: FunctionCallOutputPayload {
-                content: "Total output lines: 1\n\nok".to_string(),
-                ..Default::default()
-            },
+            output: FunctionCallOutputPayload::from_text("Total output lines: 1\n\nok".to_string()),
        },
    ];

@@ -583,9 +568,8 @@ fn record_items_truncates_function_call_output_content() {
    let item = ResponseItem::FunctionCallOutput {
        call_id: "call-100".to_string(),
        output: FunctionCallOutputPayload {
-            content: long_output.clone(),
+            body: FunctionCallOutputBody::Text(long_output.clone()),
            success: Some(true),
-            ..Default::default()
        },
    };

@@ -594,16 +578,15 @@ fn record_items_truncates_function_call_output_content() {
    assert_eq!(history.items.len(), 1);
    match &history.items[0] {
        ResponseItem::FunctionCallOutput { output, .. } => {
-            assert_ne!(output.content, long_output);
+            let content = output.text_content().unwrap_or_default();
+            assert_ne!(content, long_output);
            assert!(
-                output.content.contains("tokens truncated"),
-                "expected token-based truncation marker, got {}",
-                output.content
+                content.contains("tokens truncated"),
+                "expected token-based truncation marker, got {content}"
            );
            assert!(
-                output.content.contains("tokens truncated"),
-                "expected truncation marker, got {}",
-                output.content
+                content.contains("tokens truncated"),
+                "expected truncation marker, got {content}"
            );
        }
        other => panic!("unexpected history item: {other:?}"),
@@ -648,9 +631,8 @@ fn record_items_respects_custom_token_limit() {
    let item = ResponseItem::FunctionCallOutput {
        call_id: "call-custom-limit".to_string(),
        output: FunctionCallOutputPayload {
-            content: long_output,
+            body: FunctionCallOutputBody::Text(long_output),
            success: Some(true),
-            ..Default::default()
        },
    };

@@ -660,7 +642,11 @@ fn record_items_respects_custom_token_limit() {
        ResponseItem::FunctionCallOutput { output, .. } => output,
        other => panic!("unexpected history item: {other:?}"),
    };
-    assert!(stored.content.contains("tokens truncated"));
+    assert!(
+        stored
+            .text_content()
+            .is_some_and(|content| content.contains("tokens truncated"))
+    );
 }

 fn assert_truncated_message_matches(message: &str, line: &str, expected_removed: usize) {
@@ -782,10 +768,7 @@ fn normalize_adds_missing_output_for_function_call() {
            },
            ResponseItem::FunctionCallOutput {
                call_id: "call-x".to_string(),
-                output: FunctionCallOutputPayload {
-                    content: "aborted".to_string(),
-                    ..Default::default()
-                },
+                output: FunctionCallOutputPayload::from_text("aborted".to_string()),
            },
        ]
    );
@@ -859,10 +842,7 @@ fn normalize_adds_missing_output_for_local_shell_call_with_id() {
            },
            ResponseItem::FunctionCallOutput {
                call_id: "shell-1".to_string(),
-                output: FunctionCallOutputPayload {
-                    content: "aborted".to_string(),
-                    ..Default::default()
-                },
+                output: FunctionCallOutputPayload::from_text("aborted".to_string()),
            },
        ]
    );
@@ -873,10 +853,7 @@ fn normalize_adds_missing_output_for_local_shell_call_with_id() {
 fn normalize_removes_orphan_function_call_output() {
    let items = vec![ResponseItem::FunctionCallOutput {
        call_id: "orphan-1".to_string(),
-        output: FunctionCallOutputPayload {
-            content: "ok".to_string(),
-            ..Default::default()
-        },
+        output: FunctionCallOutputPayload::from_text("ok".to_string()),
    }];
    let mut h = create_history_with_items(items);

@@ -913,10 +890,7 @@ fn normalize_mixed_inserts_and_removals() {
        // Orphan output that should be removed
        ResponseItem::FunctionCallOutput {
            call_id: "c2".to_string(),
-            output: FunctionCallOutputPayload {
-                content: "ok".to_string(),
-                ..Default::default()
-            },
+            output: FunctionCallOutputPayload::from_text("ok".to_string()),
        },
        // Will get an inserted custom tool output
        ResponseItem::CustomToolCall {
@@ -955,10 +929,7 @@ fn normalize_mixed_inserts_and_removals() {
            },
            ResponseItem::FunctionCallOutput {
                call_id: "c1".to_string(),
-                output: FunctionCallOutputPayload {
-                    content: "aborted".to_string(),
-                    ..Default::default()
-                },
+                output: FunctionCallOutputPayload::from_text("aborted".to_string()),
            },
            ResponseItem::CustomToolCall {
                id: None,
@@ -985,10 +956,7 @@ fn normalize_mixed_inserts_and_removals() {
            },
            ResponseItem::FunctionCallOutput {
                call_id: "s1".to_string(),
-                output: FunctionCallOutputPayload {
-                    content: "aborted".to_string(),
-                    ..Default::default()
-                },
+                output: FunctionCallOutputPayload::from_text("aborted".to_string()),
            },
        ]
    );
@@ -1015,10 +983,7 @@ fn normalize_adds_missing_output_for_function_call_inserts_output() {
            },
            ResponseItem::FunctionCallOutput {
                call_id: "call-x".to_string(),
-                output: FunctionCallOutputPayload {
-                    content: "aborted".to_string(),
-                    ..Default::default()
-                },
+                output: FunctionCallOutputPayload::from_text("aborted".to_string()),
            },
        ]
    );
@@ -1065,10 +1030,7 @@ fn normalize_adds_missing_output_for_local_shell_call_with_id_panics_in_debug()
 fn normalize_removes_orphan_function_call_output_panics_in_debug() {
    let items = vec![ResponseItem::FunctionCallOutput {
        call_id: "orphan-1".to_string(),
-        output: FunctionCallOutputPayload {
-            content: "ok".to_string(),
-            ..Default::default()
-        },
+        output: FunctionCallOutputPayload::from_text("ok".to_string()),
    }];
    let mut h = create_history_with_items(items);
    h.normalize_history();
@@ -1099,10 +1061,7 @@ fn normalize_mixed_inserts_and_removals_panics_in_debug() {
        },
        ResponseItem::FunctionCallOutput {
            call_id: "c2".to_string(),
-            output: FunctionCallOutputPayload {
-                content: "ok".to_string(),
-                ..Default::default()
-            },
+            output: FunctionCallOutputPayload::from_text("ok".to_string()),
        },
        ResponseItem::CustomToolCall {
            id: None,
--- a/codex-rs/core/src/context_manager/normalize.rs
+++ b/codex-rs/core/src/context_manager/normalize.rs
@@ -1,5 +1,6 @@
 use std::collections::HashSet;

+use codex_protocol::models::FunctionCallOutputBody;
 use codex_protocol::models::FunctionCallOutputPayload;
 use codex_protocol::models::ResponseItem;

@@ -29,7 +30,7 @@ pub(crate) fn ensure_call_outputs_present(items: &mut Vec<ResponseItem>) {
                        ResponseItem::FunctionCallOutput {
                            call_id: call_id.clone(),
                            output: FunctionCallOutputPayload {
-                                content: "aborted".to_string(),
+                                body: FunctionCallOutputBody::Text("aborted".to_string()),
                                ..Default::default()
                            },
                        },
@@ -76,7 +77,7 @@ pub(crate) fn ensure_call_outputs_present(items: &mut Vec<ResponseItem>) {
                            ResponseItem::FunctionCallOutput {
                                call_id: call_id.clone(),
                                output: FunctionCallOutputPayload {
-                                    content: "aborted".to_string(),
+                                    body: FunctionCallOutputBody::Text("aborted".to_string()),
                                    ..Default::default()
                                },
                            },