mirror of
https://github.com/openai/codex.git
synced 2026-05-02 04:11:39 +03:00
Fix: Render MCP image outputs regardless of ordering (#9815)
## What?
- Render an MCP image output cell whenever a decodable image block
exists in `CallToolResult.content` (including text-before-image or
malformed image before valid image).
## Why?
- Tool results that include caption text before the image currently drop
the image output cell.
- A malformed image block can also suppress later valid image output.
## How?
- Iterate `content` and return the first successfully decoded image
instead of only checking the first block.
- Add unit tests that cover text-before-image ordering and
invalid-image-before-valid.
## Before
```rust
let image = match result {
Ok(mcp_types::CallToolResult { content, .. }) => {
if let Some(mcp_types::ContentBlock::ImageContent(image)) = content.first() {
// decode image (fails -> None)
} else {
None
}
}
_ => None,
}?;
```
## After
```rust
let image = result
.as_ref()
.ok()?
.content
.iter()
.find_map(decode_mcp_image)?;
```
## Risk / Impact
- Low: only affects image cell creation for MCP tool results; no change
for non-image outputs.
## Tests
- [x] `just fmt`
- [x] `cargo test -p codex-tui`
- [x] Rerun after branch update (2026-01-27): `just fmt`, `cargo test -p
codex-tui`
Manual testing
# Manual testing: MCP image tool result rendering (Codex TUI)
# Build the rmcp stdio test server binary:
cd codex-rs
cargo build -p codex-rmcp-client --bin test_stdio_server
# Register the server as an MCP server (absolute path to the built binary):
codex mcp add mcpimg -- /Users/joshka/code/codex-pr-review/codex-rs/target/debug/test_stdio_server
# Then in Codex TUI, ask it to call:
- mcpimg.image_scenario({"scenario":"image_only"})
- mcpimg.image_scenario({"scenario":"text_then_image","caption":"Here is the image:"})
- mcpimg.image_scenario({"scenario":"invalid_base64_then_image"})
- mcpimg.image_scenario({"scenario":"invalid_image_bytes_then_image"})
- mcpimg.image_scenario({"scenario":"multiple_valid_images"})
- mcpimg.image_scenario({"scenario":"image_then_text","caption":"Here is the image:"})
- mcpimg.image_scenario({"scenario":"text_only","caption":"Here is the image:"})
# Expected:
# - You should see an extra history cell: "tool result (image output)" when the
# tool result contains at least one decodable image block (even if earlier
# blocks are text or invalid images).
Fixes #9814
---------
Co-authored-by: Josh McKinney <joshka@openai.com>
This commit is contained in:
@@ -1429,44 +1429,62 @@ pub(crate) fn new_web_search_call(
|
||||
cell
|
||||
}
|
||||
|
||||
/// If the first content is an image, return a new cell with the image.
|
||||
/// TODO(rgwood-dd): Handle images properly even if they're not the first result.
|
||||
/// Returns an additional history cell if an MCP tool result includes a decodable image.
|
||||
///
|
||||
/// This intentionally returns at most one cell: the first image in `CallToolResult.content` that
|
||||
/// successfully base64-decodes and parses as an image. This is used as a lightweight “image output
|
||||
/// exists” affordance separate from the main MCP tool call cell.
|
||||
///
|
||||
/// Manual testing tip:
|
||||
/// - Run the rmcp stdio test server (`codex-rs/rmcp-client/src/bin/test_stdio_server.rs`) and
|
||||
/// register it as an MCP server via `codex mcp add`.
|
||||
/// - Use its `image_scenario` tool with cases like `text_then_image`,
|
||||
/// `invalid_base64_then_image`, or `invalid_image_bytes_then_image` to ensure this path triggers
|
||||
/// even when the first block is not a valid image.
|
||||
fn try_new_completed_mcp_tool_call_with_image_output(
|
||||
result: &Result<mcp_types::CallToolResult, String>,
|
||||
) -> Option<CompletedMcpToolCallWithImageOutput> {
|
||||
match result {
|
||||
Ok(mcp_types::CallToolResult { content, .. }) => {
|
||||
if let Some(mcp_types::ContentBlock::ImageContent(image)) = content.first() {
|
||||
let raw_data = match base64::engine::general_purpose::STANDARD.decode(&image.data) {
|
||||
Ok(data) => data,
|
||||
Err(e) => {
|
||||
error!("Failed to decode image data: {e}");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
let reader = match ImageReader::new(Cursor::new(raw_data)).with_guessed_format() {
|
||||
Ok(reader) => reader,
|
||||
Err(e) => {
|
||||
error!("Failed to guess image format: {e}");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
let image = result
|
||||
.as_ref()
|
||||
.ok()?
|
||||
.content
|
||||
.iter()
|
||||
.find_map(decode_mcp_image)?;
|
||||
|
||||
let image = match reader.decode() {
|
||||
Ok(image) => image,
|
||||
Err(e) => {
|
||||
error!("Image decoding failed: {e}");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
Some(CompletedMcpToolCallWithImageOutput { _image: image })
|
||||
}
|
||||
|
||||
Some(CompletedMcpToolCallWithImageOutput { _image: image })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
/// Decodes an MCP `ImageContent` block into an in-memory image.
|
||||
///
|
||||
/// Returns `None` when the block is not an image, when base64 decoding fails, when the format
|
||||
/// cannot be inferred, or when the image decoder rejects the bytes.
|
||||
fn decode_mcp_image(block: &mcp_types::ContentBlock) -> Option<DynamicImage> {
|
||||
let image = match block {
|
||||
mcp_types::ContentBlock::ImageContent(image) => image,
|
||||
_ => return None,
|
||||
};
|
||||
let raw_data = base64::engine::general_purpose::STANDARD
|
||||
.decode(&image.data)
|
||||
.map_err(|e| {
|
||||
error!("Failed to decode image data: {e}");
|
||||
e
|
||||
})
|
||||
.ok()?;
|
||||
let reader = ImageReader::new(Cursor::new(raw_data))
|
||||
.with_guessed_format()
|
||||
.map_err(|e| {
|
||||
error!("Failed to guess image format: {e}");
|
||||
e
|
||||
})
|
||||
.ok()?;
|
||||
|
||||
reader
|
||||
.decode()
|
||||
.map_err(|e| {
|
||||
error!("Image decoding failed: {e}");
|
||||
e
|
||||
})
|
||||
.ok()
|
||||
}
|
||||
|
||||
#[allow(clippy::disallowed_methods)]
|
||||
@@ -1929,9 +1947,12 @@ mod tests {
|
||||
use codex_core::protocol::ExecCommandSource;
|
||||
use mcp_types::CallToolResult;
|
||||
use mcp_types::ContentBlock;
|
||||
use mcp_types::ImageContent;
|
||||
use mcp_types::TextContent;
|
||||
use mcp_types::Tool;
|
||||
use mcp_types::ToolInputSchema;
|
||||
|
||||
const SMALL_PNG_BASE64: &str = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==";
|
||||
async fn test_config() -> Config {
|
||||
let codex_home = std::env::temp_dir();
|
||||
ConfigBuilder::default()
|
||||
@@ -1957,6 +1978,15 @@ mod tests {
|
||||
render_lines(&cell.transcript_lines(u16::MAX))
|
||||
}
|
||||
|
||||
fn image_block(data: &str) -> ContentBlock {
|
||||
ContentBlock::ImageContent(ImageContent {
|
||||
annotations: None,
|
||||
data: data.to_string(),
|
||||
mime_type: "image/png".into(),
|
||||
r#type: "image".into(),
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unified_exec_interaction_cell_renders_input() {
|
||||
let cell =
|
||||
@@ -2251,6 +2281,63 @@ mod tests {
|
||||
insta::assert_snapshot!(rendered);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn completed_mcp_tool_call_image_after_text_returns_extra_cell() {
|
||||
let invocation = McpInvocation {
|
||||
server: "image".into(),
|
||||
tool: "generate".into(),
|
||||
arguments: Some(json!({
|
||||
"prompt": "tiny image",
|
||||
})),
|
||||
};
|
||||
|
||||
let result = CallToolResult {
|
||||
content: vec![
|
||||
ContentBlock::TextContent(TextContent {
|
||||
annotations: None,
|
||||
text: "Here is the image:".into(),
|
||||
r#type: "text".into(),
|
||||
}),
|
||||
image_block(SMALL_PNG_BASE64),
|
||||
],
|
||||
is_error: None,
|
||||
structured_content: None,
|
||||
};
|
||||
|
||||
let mut cell = new_active_mcp_tool_call("call-image".into(), invocation, true);
|
||||
let extra_cell = cell
|
||||
.complete(Duration::from_millis(25), Ok(result))
|
||||
.expect("expected image cell");
|
||||
|
||||
let rendered = render_lines(&extra_cell.display_lines(80));
|
||||
assert_eq!(rendered, vec!["tool result (image output)"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn completed_mcp_tool_call_skips_invalid_image_blocks() {
|
||||
let invocation = McpInvocation {
|
||||
server: "image".into(),
|
||||
tool: "generate".into(),
|
||||
arguments: Some(json!({
|
||||
"prompt": "tiny image",
|
||||
})),
|
||||
};
|
||||
|
||||
let result = CallToolResult {
|
||||
content: vec![image_block("not-base64"), image_block(SMALL_PNG_BASE64)],
|
||||
is_error: None,
|
||||
structured_content: None,
|
||||
};
|
||||
|
||||
let mut cell = new_active_mcp_tool_call("call-image-2".into(), invocation, true);
|
||||
let extra_cell = cell
|
||||
.complete(Duration::from_millis(25), Ok(result))
|
||||
.expect("expected image cell");
|
||||
|
||||
let rendered = render_lines(&extra_cell.display_lines(80));
|
||||
assert_eq!(rendered, vec!["tool result (image output)"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn completed_mcp_tool_call_error_snapshot() {
|
||||
let invocation = McpInvocation {
|
||||
|
||||
Reference in New Issue
Block a user