mirror of
https://github.com/openai/codex.git
synced 2026-04-30 19:32:04 +03:00
Fix: Render MCP image outputs regardless of ordering (#9815)
## What?
- Render an MCP image output cell whenever a decodable image block
exists in `CallToolResult.content` (including text-before-image or
malformed image before valid image).
## Why?
- Tool results that include caption text before the image currently drop
the image output cell.
- A malformed image block can also suppress later valid image output.
## How?
- Iterate `content` and return the first successfully decoded image
instead of only checking the first block.
- Add unit tests that cover text-before-image ordering and
invalid-image-before-valid.
## Before
```rust
let image = match result {
Ok(mcp_types::CallToolResult { content, .. }) => {
if let Some(mcp_types::ContentBlock::ImageContent(image)) = content.first() {
// decode image (fails -> None)
} else {
None
}
}
_ => None,
}?;
```
## After
```rust
let image = result
.as_ref()
.ok()?
.content
.iter()
.find_map(decode_mcp_image)?;
```
## Risk / Impact
- Low: only affects image cell creation for MCP tool results; no change
for non-image outputs.
## Tests
- [x] `just fmt`
- [x] `cargo test -p codex-tui`
- [x] Rerun after branch update (2026-01-27): `just fmt`, `cargo test -p
codex-tui`
Manual testing
# Manual testing: MCP image tool result rendering (Codex TUI)
# Build the rmcp stdio test server binary:
cd codex-rs
cargo build -p codex-rmcp-client --bin test_stdio_server
# Register the server as an MCP server (absolute path to the built binary):
codex mcp add mcpimg -- /Users/joshka/code/codex-pr-review/codex-rs/target/debug/test_stdio_server
# Then in Codex TUI, ask it to call:
- mcpimg.image_scenario({"scenario":"image_only"})
- mcpimg.image_scenario({"scenario":"text_then_image","caption":"Here is the image:"})
- mcpimg.image_scenario({"scenario":"invalid_base64_then_image"})
- mcpimg.image_scenario({"scenario":"invalid_image_bytes_then_image"})
- mcpimg.image_scenario({"scenario":"multiple_valid_images"})
- mcpimg.image_scenario({"scenario":"image_then_text","caption":"Here is the image:"})
- mcpimg.image_scenario({"scenario":"text_only","caption":"Here is the image:"})
# Expected:
# - You should see an extra history cell: "tool result (image output)" when the
# tool result contains at least one decodable image block (even if earlier
# blocks are text or invalid images).
Fixes #9814
---------
Co-authored-by: Josh McKinney <joshka@openai.com>
This commit is contained in:
@@ -35,12 +35,19 @@ struct TestToolServer {
|
||||
|
||||
const MEMO_URI: &str = "memo://codex/example-note";
|
||||
const MEMO_CONTENT: &str = "This is a sample MCP resource served by the rmcp test server.";
|
||||
const SMALL_PNG_BASE64: &str = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==";
|
||||
|
||||
pub fn stdio() -> (tokio::io::Stdin, tokio::io::Stdout) {
|
||||
(tokio::io::stdin(), tokio::io::stdout())
|
||||
}
|
||||
|
||||
impl TestToolServer {
|
||||
fn new() -> Self {
|
||||
let tools = vec![Self::echo_tool(), Self::image_tool()];
|
||||
let tools = vec![
|
||||
Self::echo_tool(),
|
||||
Self::image_tool(),
|
||||
Self::image_scenario_tool(),
|
||||
];
|
||||
let resources = vec![Self::memo_resource()];
|
||||
let resource_templates = vec![Self::memo_template()];
|
||||
Self {
|
||||
@@ -86,6 +93,61 @@ impl TestToolServer {
|
||||
)
|
||||
}
|
||||
|
||||
/// Tool intended for manual testing of Codex TUI rendering for MCP image tool results.
|
||||
///
|
||||
/// This exists to exercise edge cases where a `CallToolResult.content` includes image blocks
|
||||
/// that aren't the first item (or includes invalid image blocks before a valid image).
|
||||
///
|
||||
/// Manual testing approach (Codex TUI):
|
||||
/// - Build this binary: `cargo build -p codex-rmcp-client --bin test_stdio_server`
|
||||
/// - Register it:
|
||||
/// - `codex mcp add mcpimg -- /abs/path/to/test_stdio_server`
|
||||
/// - Then in Codex TUI, ask it to call:
|
||||
/// - `mcpimg.image_scenario({"scenario":"image_only"})`
|
||||
/// - `mcpimg.image_scenario({"scenario":"text_then_image","caption":"Here is the image:"})`
|
||||
/// - `mcpimg.image_scenario({"scenario":"invalid_base64_then_image"})`
|
||||
/// - `mcpimg.image_scenario({"scenario":"invalid_image_bytes_then_image"})`
|
||||
/// - `mcpimg.image_scenario({"scenario":"multiple_valid_images"})`
|
||||
/// - `mcpimg.image_scenario({"scenario":"image_then_text","caption":"Here is the image:"})`
|
||||
/// - `mcpimg.image_scenario({"scenario":"text_only","caption":"Here is the image:"})`
|
||||
/// - You should see an extra history cell: `tool result (image output)`.
|
||||
fn image_scenario_tool() -> Tool {
|
||||
#[expect(clippy::expect_used)]
|
||||
let schema: JsonObject = serde_json::from_value(serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"scenario": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"image_only",
|
||||
"text_then_image",
|
||||
"invalid_base64_then_image",
|
||||
"invalid_image_bytes_then_image",
|
||||
"multiple_valid_images",
|
||||
"image_then_text",
|
||||
"text_only"
|
||||
]
|
||||
},
|
||||
"caption": { "type": "string" },
|
||||
"data_url": {
|
||||
"type": "string",
|
||||
"description": "Optional data URL like data:image/png;base64,AAAA...; if omitted, uses a built-in tiny PNG."
|
||||
}
|
||||
},
|
||||
"required": ["scenario"],
|
||||
"additionalProperties": false
|
||||
}))
|
||||
.expect("image_scenario tool schema should deserialize");
|
||||
|
||||
Tool::new(
|
||||
Cow::Borrowed("image_scenario"),
|
||||
Cow::Borrowed(
|
||||
"Return content blocks for manual testing of MCP image rendering scenarios.",
|
||||
),
|
||||
Arc::new(schema),
|
||||
)
|
||||
}
|
||||
|
||||
fn memo_resource() -> Resource {
|
||||
let raw = RawResource {
|
||||
uri: MEMO_URI.to_string(),
|
||||
@@ -125,6 +187,32 @@ struct EchoArgs {
|
||||
env_var: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
/// Scenarios for `image_scenario`, intended to exercise Codex TUI handling of MCP image outputs.
|
||||
///
|
||||
/// The key behavior under test is that the TUI should render an image output cell if *any*
|
||||
/// decodable image block exists in the tool result content, even if the first block is text or an
|
||||
/// invalid image.
|
||||
enum ImageScenario {
|
||||
ImageOnly,
|
||||
TextThenImage,
|
||||
InvalidBase64ThenImage,
|
||||
InvalidImageBytesThenImage,
|
||||
MultipleValidImages,
|
||||
ImageThenText,
|
||||
TextOnly,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
struct ImageScenarioArgs {
|
||||
scenario: ImageScenario,
|
||||
#[serde(default)]
|
||||
caption: Option<String>,
|
||||
#[serde(default)]
|
||||
data_url: Option<String>,
|
||||
}
|
||||
|
||||
impl ServerHandler for TestToolServer {
|
||||
fn get_info(&self) -> ServerInfo {
|
||||
ServerInfo {
|
||||
@@ -244,14 +332,6 @@ impl ServerHandler for TestToolServer {
|
||||
)
|
||||
})?;
|
||||
|
||||
fn parse_data_url(url: &str) -> Option<(String, String)> {
|
||||
let rest = url.strip_prefix("data:")?;
|
||||
let (mime_and_opts, data) = rest.split_once(',')?;
|
||||
let (mime, _opts) =
|
||||
mime_and_opts.split_once(';').unwrap_or((mime_and_opts, ""));
|
||||
Some((mime.to_string(), data.to_string()))
|
||||
}
|
||||
|
||||
let (mime_type, data_b64) = parse_data_url(&data_url).ok_or_else(|| {
|
||||
McpError::invalid_params(
|
||||
format!("invalid data URL for image tool: {data_url}"),
|
||||
@@ -263,6 +343,10 @@ impl ServerHandler for TestToolServer {
|
||||
data_b64, mime_type,
|
||||
)]))
|
||||
}
|
||||
"image_scenario" => {
|
||||
let args = Self::parse_call_args::<ImageScenarioArgs>(&request, "image_scenario")?;
|
||||
Self::image_scenario_result(args)
|
||||
}
|
||||
other => Err(McpError::invalid_params(
|
||||
format!("unknown tool: {other}"),
|
||||
None,
|
||||
@@ -271,6 +355,89 @@ impl ServerHandler for TestToolServer {
|
||||
}
|
||||
}
|
||||
|
||||
impl TestToolServer {
|
||||
fn parse_call_args<T: for<'de> Deserialize<'de>>(
|
||||
request: &CallToolRequestParam,
|
||||
tool_name: &'static str,
|
||||
) -> Result<T, McpError> {
|
||||
match request.arguments.as_ref() {
|
||||
Some(arguments) => serde_json::from_value(serde_json::Value::Object(
|
||||
arguments.clone().into_iter().collect(),
|
||||
))
|
||||
.map_err(|err| McpError::invalid_params(err.to_string(), None)),
|
||||
None => Err(McpError::invalid_params(
|
||||
format!("missing arguments for {tool_name} tool"),
|
||||
None,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn image_scenario_result(args: ImageScenarioArgs) -> Result<CallToolResult, McpError> {
|
||||
let (mime_type, valid_data_b64) = if let Some(data_url) = &args.data_url {
|
||||
parse_data_url(data_url).ok_or_else(|| {
|
||||
McpError::invalid_params(
|
||||
format!("invalid data_url for image_scenario tool: {data_url}"),
|
||||
None,
|
||||
)
|
||||
})?
|
||||
} else {
|
||||
("image/png".to_string(), SMALL_PNG_BASE64.to_string())
|
||||
};
|
||||
|
||||
let caption = args
|
||||
.caption
|
||||
.unwrap_or_else(|| "Here is the image:".to_string());
|
||||
|
||||
let mut content = Vec::new();
|
||||
match args.scenario {
|
||||
ImageScenario::ImageOnly => {
|
||||
content.push(rmcp::model::Content::image(valid_data_b64, mime_type));
|
||||
}
|
||||
ImageScenario::TextThenImage => {
|
||||
content.push(rmcp::model::Content::text(caption));
|
||||
content.push(rmcp::model::Content::image(valid_data_b64, mime_type));
|
||||
}
|
||||
ImageScenario::InvalidBase64ThenImage => {
|
||||
content.push(rmcp::model::Content::image(
|
||||
"not-base64".to_string(),
|
||||
"image/png".to_string(),
|
||||
));
|
||||
content.push(rmcp::model::Content::image(valid_data_b64, mime_type));
|
||||
}
|
||||
ImageScenario::InvalidImageBytesThenImage => {
|
||||
content.push(rmcp::model::Content::image(
|
||||
"bm90IGFuIGltYWdl".to_string(),
|
||||
"image/png".to_string(),
|
||||
));
|
||||
content.push(rmcp::model::Content::image(valid_data_b64, mime_type));
|
||||
}
|
||||
ImageScenario::MultipleValidImages => {
|
||||
content.push(rmcp::model::Content::image(
|
||||
valid_data_b64.clone(),
|
||||
mime_type.clone(),
|
||||
));
|
||||
content.push(rmcp::model::Content::image(valid_data_b64, mime_type));
|
||||
}
|
||||
ImageScenario::ImageThenText => {
|
||||
content.push(rmcp::model::Content::image(valid_data_b64, mime_type));
|
||||
content.push(rmcp::model::Content::text(caption));
|
||||
}
|
||||
ImageScenario::TextOnly => {
|
||||
content.push(rmcp::model::Content::text(caption));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(CallToolResult::success(content))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_data_url(url: &str) -> Option<(String, String)> {
|
||||
let rest = url.strip_prefix("data:")?;
|
||||
let (mime_and_opts, data) = rest.split_once(',')?;
|
||||
let (mime, _opts) = mime_and_opts.split_once(';').unwrap_or((mime_and_opts, ""));
|
||||
Some((mime.to_string(), data.to_string()))
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
eprintln!("starting rmcp test server");
|
||||
|
||||
Reference in New Issue
Block a user