Fix: Render MCP image outputs regardless of ordering (#9815)

## What? - Render an MCP image output cell whenever a decodable image block exists in `CallToolResult.content` (including text-before-image or malformed image before valid image). ## Why? - Tool results that include caption text before the image currently drop the image output cell. - A malformed image block can also suppress later valid image output. ## How? - Iterate `content` and return the first successfully decoded image instead of only checking the first block. - Add unit tests that cover text-before-image ordering and invalid-image-before-valid. ## Before ```rust let image = match result { Ok(mcp_types::CallToolResult { content, .. }) => { if let Some(mcp_types::ContentBlock::ImageContent(image)) = content.first() { // decode image (fails -> None) } else { None } } _ => None, }?; ``` ## After ```rust let image = result .as_ref() .ok()? .content .iter() .find_map(decode_mcp_image)?; ``` ## Risk / Impact - Low: only affects image cell creation for MCP tool results; no change for non-image outputs. ## Tests - [x] `just fmt` - [x] `cargo test -p codex-tui` - [x] Rerun after branch update (2026-01-27): `just fmt`, `cargo test -p codex-tui` Manual testing # Manual testing: MCP image tool result rendering (Codex TUI) # Build the rmcp stdio test server binary: cd codex-rs cargo build -p codex-rmcp-client --bin test_stdio_server # Register the server as an MCP server (absolute path to the built binary): codex mcp add mcpimg -- /Users/joshka/code/codex-pr-review/codex-rs/target/debug/test_stdio_server # Then in Codex TUI, ask it to call: - mcpimg.image_scenario({"scenario":"image_only"}) - mcpimg.image_scenario({"scenario":"text_then_image","caption":"Here is the image:"}) - mcpimg.image_scenario({"scenario":"invalid_base64_then_image"}) - mcpimg.image_scenario({"scenario":"invalid_image_bytes_then_image"}) - mcpimg.image_scenario({"scenario":"multiple_valid_images"}) - mcpimg.image_scenario({"scenario":"image_then_text","caption":"Here is the image:"}) - mcpimg.image_scenario({"scenario":"text_only","caption":"Here is the image:"}) # Expected: # - You should see an extra history cell: "tool result (image output)" when the # tool result contains at least one decodable image block (even if earlier # blocks are text or invalid images). Fixes #9814 --------- Co-authored-by: Josh McKinney <joshka@openai.com>
2026-04-30 19:32:04 +03:00 · 2026-01-28 08:14:08 +11:00
parent 28051d18c6
commit 337643b00a
2 changed files with 296 additions and 42 deletions
--- a/codex-rs/rmcp-client/src/bin/test_stdio_server.rs
+++ b/codex-rs/rmcp-client/src/bin/test_stdio_server.rs
@@ -35,12 +35,19 @@ struct TestToolServer {

 const MEMO_URI: &str = "memo://codex/example-note";
 const MEMO_CONTENT: &str = "This is a sample MCP resource served by the rmcp test server.";
+const SMALL_PNG_BASE64: &str = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==";
+
 pub fn stdio() -> (tokio::io::Stdin, tokio::io::Stdout) {
    (tokio::io::stdin(), tokio::io::stdout())
 }
+
 impl TestToolServer {
    fn new() -> Self {
-        let tools = vec![Self::echo_tool(), Self::image_tool()];
+        let tools = vec![
+            Self::echo_tool(),
+            Self::image_tool(),
+            Self::image_scenario_tool(),
+        ];
        let resources = vec![Self::memo_resource()];
        let resource_templates = vec![Self::memo_template()];
        Self {
@@ -86,6 +93,61 @@ impl TestToolServer {
        )
    }

+    /// Tool intended for manual testing of Codex TUI rendering for MCP image tool results.
+    ///
+    /// This exists to exercise edge cases where a `CallToolResult.content` includes image blocks
+    /// that aren't the first item (or includes invalid image blocks before a valid image).
+    ///
+    /// Manual testing approach (Codex TUI):
+    /// - Build this binary: `cargo build -p codex-rmcp-client --bin test_stdio_server`
+    /// - Register it:
+    ///   - `codex mcp add mcpimg -- /abs/path/to/test_stdio_server`
+    /// - Then in Codex TUI, ask it to call:
+    ///   - `mcpimg.image_scenario({"scenario":"image_only"})`
+    ///   - `mcpimg.image_scenario({"scenario":"text_then_image","caption":"Here is the image:"})`
+    ///   - `mcpimg.image_scenario({"scenario":"invalid_base64_then_image"})`
+    ///   - `mcpimg.image_scenario({"scenario":"invalid_image_bytes_then_image"})`
+    ///   - `mcpimg.image_scenario({"scenario":"multiple_valid_images"})`
+    ///   - `mcpimg.image_scenario({"scenario":"image_then_text","caption":"Here is the image:"})`
+    ///   - `mcpimg.image_scenario({"scenario":"text_only","caption":"Here is the image:"})`
+    /// - You should see an extra history cell: `tool result (image output)`.
+    fn image_scenario_tool() -> Tool {
+        #[expect(clippy::expect_used)]
+        let schema: JsonObject = serde_json::from_value(serde_json::json!({
+            "type": "object",
+            "properties": {
+                "scenario": {
+                    "type": "string",
+                    "enum": [
+                        "image_only",
+                        "text_then_image",
+                        "invalid_base64_then_image",
+                        "invalid_image_bytes_then_image",
+                        "multiple_valid_images",
+                        "image_then_text",
+                        "text_only"
+                    ]
+                },
+                "caption": { "type": "string" },
+                "data_url": {
+                    "type": "string",
+                    "description": "Optional data URL like data:image/png;base64,AAAA...; if omitted, uses a built-in tiny PNG."
+                }
+            },
+            "required": ["scenario"],
+            "additionalProperties": false
+        }))
+        .expect("image_scenario tool schema should deserialize");
+
+        Tool::new(
+            Cow::Borrowed("image_scenario"),
+            Cow::Borrowed(
+                "Return content blocks for manual testing of MCP image rendering scenarios.",
+            ),
+            Arc::new(schema),
+        )
+    }
+
    fn memo_resource() -> Resource {
        let raw = RawResource {
            uri: MEMO_URI.to_string(),
@@ -125,6 +187,32 @@ struct EchoArgs {
    env_var: Option<String>,
 }

+#[derive(Deserialize, Debug)]
+#[serde(rename_all = "snake_case")]
+/// Scenarios for `image_scenario`, intended to exercise Codex TUI handling of MCP image outputs.
+///
+/// The key behavior under test is that the TUI should render an image output cell if *any*
+/// decodable image block exists in the tool result content, even if the first block is text or an
+/// invalid image.
+enum ImageScenario {
+    ImageOnly,
+    TextThenImage,
+    InvalidBase64ThenImage,
+    InvalidImageBytesThenImage,
+    MultipleValidImages,
+    ImageThenText,
+    TextOnly,
+}
+
+#[derive(Deserialize, Debug)]
+struct ImageScenarioArgs {
+    scenario: ImageScenario,
+    #[serde(default)]
+    caption: Option<String>,
+    #[serde(default)]
+    data_url: Option<String>,
+}
+
 impl ServerHandler for TestToolServer {
    fn get_info(&self) -> ServerInfo {
        ServerInfo {
@@ -244,14 +332,6 @@ impl ServerHandler for TestToolServer {
                    )
                })?;

-                fn parse_data_url(url: &str) -> Option<(String, String)> {
-                    let rest = url.strip_prefix("data:")?;
-                    let (mime_and_opts, data) = rest.split_once(',')?;
-                    let (mime, _opts) =
-                        mime_and_opts.split_once(';').unwrap_or((mime_and_opts, ""));
-                    Some((mime.to_string(), data.to_string()))
-                }
-
                let (mime_type, data_b64) = parse_data_url(&data_url).ok_or_else(|| {
                    McpError::invalid_params(
                        format!("invalid data URL for image tool: {data_url}"),
@@ -263,6 +343,10 @@ impl ServerHandler for TestToolServer {
                    data_b64, mime_type,
                )]))
            }
+            "image_scenario" => {
+                let args = Self::parse_call_args::<ImageScenarioArgs>(&request, "image_scenario")?;
+                Self::image_scenario_result(args)
+            }
            other => Err(McpError::invalid_params(
                format!("unknown tool: {other}"),
                None,
@@ -271,6 +355,89 @@ impl ServerHandler for TestToolServer {
    }
 }

+impl TestToolServer {
+    fn parse_call_args<T: for<'de> Deserialize<'de>>(
+        request: &CallToolRequestParam,
+        tool_name: &'static str,
+    ) -> Result<T, McpError> {
+        match request.arguments.as_ref() {
+            Some(arguments) => serde_json::from_value(serde_json::Value::Object(
+                arguments.clone().into_iter().collect(),
+            ))
+            .map_err(|err| McpError::invalid_params(err.to_string(), None)),
+            None => Err(McpError::invalid_params(
+                format!("missing arguments for {tool_name} tool"),
+                None,
+            )),
+        }
+    }
+
+    fn image_scenario_result(args: ImageScenarioArgs) -> Result<CallToolResult, McpError> {
+        let (mime_type, valid_data_b64) = if let Some(data_url) = &args.data_url {
+            parse_data_url(data_url).ok_or_else(|| {
+                McpError::invalid_params(
+                    format!("invalid data_url for image_scenario tool: {data_url}"),
+                    None,
+                )
+            })?
+        } else {
+            ("image/png".to_string(), SMALL_PNG_BASE64.to_string())
+        };
+
+        let caption = args
+            .caption
+            .unwrap_or_else(|| "Here is the image:".to_string());
+
+        let mut content = Vec::new();
+        match args.scenario {
+            ImageScenario::ImageOnly => {
+                content.push(rmcp::model::Content::image(valid_data_b64, mime_type));
+            }
+            ImageScenario::TextThenImage => {
+                content.push(rmcp::model::Content::text(caption));
+                content.push(rmcp::model::Content::image(valid_data_b64, mime_type));
+            }
+            ImageScenario::InvalidBase64ThenImage => {
+                content.push(rmcp::model::Content::image(
+                    "not-base64".to_string(),
+                    "image/png".to_string(),
+                ));
+                content.push(rmcp::model::Content::image(valid_data_b64, mime_type));
+            }
+            ImageScenario::InvalidImageBytesThenImage => {
+                content.push(rmcp::model::Content::image(
+                    "bm90IGFuIGltYWdl".to_string(),
+                    "image/png".to_string(),
+                ));
+                content.push(rmcp::model::Content::image(valid_data_b64, mime_type));
+            }
+            ImageScenario::MultipleValidImages => {
+                content.push(rmcp::model::Content::image(
+                    valid_data_b64.clone(),
+                    mime_type.clone(),
+                ));
+                content.push(rmcp::model::Content::image(valid_data_b64, mime_type));
+            }
+            ImageScenario::ImageThenText => {
+                content.push(rmcp::model::Content::image(valid_data_b64, mime_type));
+                content.push(rmcp::model::Content::text(caption));
+            }
+            ImageScenario::TextOnly => {
+                content.push(rmcp::model::Content::text(caption));
+            }
+        }
+
+        Ok(CallToolResult::success(content))
+    }
+}
+
+fn parse_data_url(url: &str) -> Option<(String, String)> {
+    let rest = url.strip_prefix("data:")?;
+    let (mime_and_opts, data) = rest.split_once(',')?;
+    let (mime, _opts) = mime_and_opts.split_once(';').unwrap_or((mime_and_opts, ""));
+    Some((mime.to_string(), data.to_string()))
+}
+
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
    eprintln!("starting rmcp test server");