Add MCP tool wall time to model output (#17406)

Include MCP wall time in the output so the model is aware of how long it's calls are taking.
2026-05-06 06:12:59 +03:00 · 2026-04-12 18:26:15 -07:00
parent 68a1d82a41
commit 7c1e41c8b6
6 changed files with 286 additions and 32 deletions
--- a/codex-rs/app-server/tests/suite/v2/mcp_server_elicitation.rs
+++ b/codex-rs/app-server/tests/suite/v2/mcp_server_elicitation.rs
@@ -32,6 +32,7 @@ use codex_app_server_protocol::TurnStartResponse;
 use codex_app_server_protocol::TurnStatus;
 use codex_app_server_protocol::UserInput as V2UserInput;
 use codex_config::types::AuthCredentialsStoreMode;
+use core_test_support::assert_regex_match;
 use core_test_support::responses;
 use pretty_assertions::assert_eq;
 use rmcp::handler::server::ServerHandler;
@@ -274,8 +275,15 @@ async fn mcp_server_elicitation_round_trip() -> Result<()> {
        .get("output")
        .and_then(Value::as_str)
        .expect("function_call_output output should be a JSON string");
+    let payload = assert_regex_match(
+        r#"(?s)^Wall time: [0-9]+(?:\.[0-9]+)? seconds\nOutput:\n(.*)$"#,
+        output,
+    )
+    .get(1)
+    .expect("wall-time wrapped output should include payload")
+    .as_str();
    assert_eq!(
-        serde_json::from_str::<Value>(output)?,
+        serde_json::from_str::<Value>(payload)?,
        json!([{
            "type": "text",
            "text": "accepted"
--- a/codex-rs/core/src/tools/context.rs
+++ b/codex-rs/core/src/tools/context.rs
@@ -118,6 +118,63 @@ impl ToolOutput for CallToolResult {
    }
 }

+#[derive(Clone, Debug)]
+pub struct McpToolOutput {
+    pub result: CallToolResult,
+    pub wall_time: Duration,
+}
+
+impl ToolOutput for McpToolOutput {
+    fn log_preview(&self) -> String {
+        let payload = self.response_payload();
+        let preview = payload.body.to_text().unwrap_or_else(|| {
+            serde_json::to_string(&self.result.content)
+                .unwrap_or_else(|err| format!("failed to serialize mcp result: {err}"))
+        });
+        telemetry_preview(&preview)
+    }
+
+    fn success_for_logging(&self) -> bool {
+        self.result.success()
+    }
+
+    fn to_response_item(&self, call_id: &str, _payload: &ToolPayload) -> ResponseInputItem {
+        ResponseInputItem::FunctionCallOutput {
+            call_id: call_id.to_string(),
+            output: self.response_payload(),
+        }
+    }
+
+    fn code_mode_result(&self, _payload: &ToolPayload) -> JsonValue {
+        serde_json::to_value(&self.result).unwrap_or_else(|err| {
+            JsonValue::String(format!("failed to serialize mcp result: {err}"))
+        })
+    }
+}
+
+impl McpToolOutput {
+    fn response_payload(&self) -> FunctionCallOutputPayload {
+        let mut payload = self.result.as_function_call_output_payload();
+        let wall_time_seconds = self.wall_time.as_secs_f64();
+        let header = format!("Wall time: {wall_time_seconds:.4} seconds\nOutput:");
+
+        match &mut payload.body {
+            FunctionCallOutputBody::Text(text) => {
+                if text.is_empty() {
+                    *text = header;
+                } else {
+                    *text = format!("{header}\n{text}");
+                }
+            }
+            FunctionCallOutputBody::ContentItems(items) => {
+                items.insert(0, FunctionCallOutputContentItem::InputText { text: header });
+            }
+        }
+
+        payload
+    }
+}
+
 #[derive(Clone)]
 pub struct ToolSearchOutput {
    pub tools: Vec<ToolSearchOutputTool>,
--- a/codex-rs/core/src/tools/context_tests.rs
+++ b/codex-rs/core/src/tools/context_tests.rs
@@ -85,6 +85,145 @@ fn mcp_code_mode_result_serializes_full_call_tool_result() {
    );
 }

+#[test]
+fn mcp_tool_output_response_item_includes_wall_time() {
+    let output = McpToolOutput {
+        result: CallToolResult {
+            content: vec![serde_json::json!({
+                "type": "text",
+                "text": "done",
+            })],
+            structured_content: None,
+            is_error: Some(false),
+            meta: None,
+        },
+        wall_time: std::time::Duration::from_millis(1250),
+    };
+
+    let response = output.to_response_item(
+        "mcp-call-1",
+        &ToolPayload::Mcp {
+            server: "server".to_string(),
+            tool: "tool".to_string(),
+            raw_arguments: "{}".to_string(),
+        },
+    );
+
+    match response {
+        ResponseInputItem::FunctionCallOutput { call_id, output } => {
+            assert_eq!(call_id, "mcp-call-1");
+            assert_eq!(output.success, Some(true));
+            let Some(text) = output.body.to_text() else {
+                panic!("MCP output should serialize as text");
+            };
+            let Some(payload) = text.strip_prefix("Wall time: 1.2500 seconds\nOutput:\n") else {
+                panic!("MCP output should include wall-time header: {text}");
+            };
+            let parsed: serde_json::Value = serde_json::from_str(payload).unwrap_or_else(|err| {
+                panic!("MCP output should serialize JSON content: {err}");
+            });
+            assert_eq!(
+                parsed,
+                json!([{
+                    "type": "text",
+                    "text": "done",
+                }])
+            );
+        }
+        other => panic!("expected FunctionCallOutput, got {other:?}"),
+    }
+}
+
+#[test]
+fn mcp_tool_output_response_item_preserves_content_items() {
+    let image_url = "data:image/png;base64,AAA";
+    let output = McpToolOutput {
+        result: CallToolResult {
+            content: vec![serde_json::json!({
+                "type": "image",
+                "mimeType": "image/png",
+                "data": "AAA",
+            })],
+            structured_content: None,
+            is_error: Some(false),
+            meta: None,
+        },
+        wall_time: std::time::Duration::from_millis(500),
+    };
+
+    let response = output.to_response_item(
+        "mcp-call-2",
+        &ToolPayload::Mcp {
+            server: "server".to_string(),
+            tool: "tool".to_string(),
+            raw_arguments: "{}".to_string(),
+        },
+    );
+
+    match response {
+        ResponseInputItem::FunctionCallOutput { output, .. } => {
+            assert_eq!(
+                output.content_items(),
+                Some(
+                    vec![
+                        FunctionCallOutputContentItem::InputText {
+                            text: "Wall time: 0.5000 seconds\nOutput:".to_string(),
+                        },
+                        FunctionCallOutputContentItem::InputImage {
+                            image_url: image_url.to_string(),
+                            detail: None,
+                        },
+                    ]
+                    .as_slice()
+                )
+            );
+            assert_eq!(
+                output.body.to_text().as_deref(),
+                Some("Wall time: 0.5000 seconds\nOutput:")
+            );
+        }
+        other => panic!("expected FunctionCallOutput, got {other:?}"),
+    }
+}
+
+#[test]
+fn mcp_tool_output_code_mode_result_stays_raw_call_tool_result() {
+    let output = McpToolOutput {
+        result: CallToolResult {
+            content: vec![serde_json::json!({
+                "type": "text",
+                "text": "ignored",
+            })],
+            structured_content: Some(serde_json::json!({
+                "content": "done",
+            })),
+            is_error: Some(false),
+            meta: None,
+        },
+        wall_time: std::time::Duration::from_millis(1250),
+    };
+
+    let result = output.code_mode_result(&ToolPayload::Mcp {
+        server: "server".to_string(),
+        tool: "tool".to_string(),
+        raw_arguments: "{}".to_string(),
+    });
+
+    assert_eq!(
+        result,
+        serde_json::json!({
+            "content": [{
+                "type": "text",
+                "text": "ignored",
+            }],
+            "structuredContent": {
+                "content": "done",
+            },
+            "isError": false,
+        })
+    );
+}
+
 #[test]
 fn custom_tool_calls_can_derive_text_from_content_items() {
    let payload = ToolPayload::Custom {
--- a/codex-rs/core/src/tools/handlers/mcp.rs
+++ b/codex-rs/core/src/tools/handlers/mcp.rs
@@ -1,16 +1,17 @@
 use std::sync::Arc;
+use std::time::Instant;

 use crate::function_tool::FunctionCallError;
 use crate::mcp_tool_call::handle_mcp_tool_call;
+use crate::tools::context::McpToolOutput;
 use crate::tools::context::ToolInvocation;
 use crate::tools::context::ToolPayload;
 use crate::tools::registry::ToolHandler;
 use crate::tools::registry::ToolKind;
-use codex_protocol::mcp::CallToolResult;

 pub struct McpHandler;
 impl ToolHandler for McpHandler {
-    type Output = CallToolResult;
+    type Output = McpToolOutput;

    fn kind(&self) -> ToolKind {
        ToolKind::Mcp
@@ -41,7 +42,8 @@ impl ToolHandler for McpHandler {
        let (server, tool, raw_arguments) = payload;
        let arguments_str = raw_arguments;

-        let output = handle_mcp_tool_call(
+        let started = Instant::now();
+        let result = handle_mcp_tool_call(
            Arc::clone(&session),
            &turn,
            call_id.clone(),
@@ -51,6 +53,9 @@ impl ToolHandler for McpHandler {
        )
        .await;

-        Ok(output)
+        Ok(McpToolOutput {
+            result,
+            wall_time: started.elapsed(),
+        })
    }
 }
--- a/codex-rs/core/tests/suite/rmcp_client.rs
+++ b/codex-rs/core/tests/suite/rmcp_client.rs
@@ -30,6 +30,7 @@ use codex_protocol::protocol::Op;
 use codex_protocol::protocol::SandboxPolicy;
 use codex_protocol::user_input::UserInput;
 use codex_utils_cargo_bin::cargo_bin;
+use core_test_support::assert_regex_match;
 use core_test_support::responses;
 use core_test_support::responses::mount_models_once;
 use core_test_support::responses::mount_sse_once;
@@ -51,6 +52,29 @@ use tokio::time::sleep;

 static OPENAI_PNG: &str = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAD0AAAA9CAYAAAAeYmHpAAAE6klEQVR4Aeyau44UVxCGx1fZsmRLlm3Zoe0XcGQ5cUiCCIgJeS9CHgAhMkISQnIuGQgJEkBcxLW+nqnZ6uqqc+nuWRC7q/P3qetf9e+MtOwyX25O4Nep6JPyop++0qev9HrfgZ+F6r2DuB/vHOrt/UIkqdDHYvujOW6fO7h/CNEI+a5jc+pBR8uy0jVFsziYu5HtfSUk+Io34q921hLNctFSX0gwww+S8wce8K1LfCU+cYW4888aov8NxqvQILUPPReLOrm6zyLxa4i+6VZuFbJo8d1MOHZm+7VUtB/aIvhPWc/3SWg49JcwFLlHxuXKjtyloo+YNhuW3VS+WPBuUEMvCFKjEDVgFBQHXrnazpqiSxNZCkQ1kYiozsbm9Oz7l4i2Il7vGccGNWAc3XosDrZe/9P3ZnMmzHNEQw4smf8RQ87XEAMsC7Az0Au+dgXerfH4+sHvEc0SYGic8WBBUGqFH2gN7yDrazy7m2pbRTeRmU3+MjZmr1h6LJgPbGy23SI6GlYT0brQ71IY8Us4PNQCm+zepSbaD2BY9xCaAsD9IIj/IzFmKMSdHHonwdZATbTnYREf6/VZGER98N9yCWIvXQwXDoDdhZJoT8jwLnJXDB9w4Sb3e6nK5ndzlkTLnP3JBu4LKkbrYrU69gCVceV0JvpyuW1xlsUVngzhwMetn/XamtTORF9IO5YnWNiyeF9zCAfqR3fUW+vZZKLtgP+ts8BmQRBREAdRDhH3o8QuRh/YucNFz2BEjxbRN6LGzphfKmvP6v6QhqIQyZ8XNJ0W0X83MR1PEcJBNO2KC2Z1TW/v244scp9FwRViZxIOBF0Lctk7ZVSavdLvRlV1hz/ysUi9sr8CIcB3nvWBwA93ykTz18eAYxQ6N/K2DkPA1lv3iXCwmDUT7YkjIby9siXueIJj9H+pzSqJ9oIuJWTUgSSt4WO7o/9GGg0viR4VinNRUDoIj34xoCd6pxD3aK3zfdbnx5v1J3ZNNEJsE0sBG7N27ReDrJc4sFxz7dI/ZAbOmmiKvHBitQXpAdR6+F7v+/ol/tOouUV01EeMZQF2BoQDn6dP4XNr+j9GZEtEK1/L8pFw7bd3a53tsTa7WD+054jOFmPg1XBKPQgnqFfmFcy32ZRvjmiIIQTYFvyDxQ8nH8WIwwGwlyDjDznnilYyFr6njrlZwsKkBpO59A7OwgdzPEWRm+G+oeb7IfyNuzjEEVLrOVxJsxvxwF8kmCM6I2QYmJunz4u4TrADpfl7mlbRTWQ7VmrBzh3+C9f6Grc3YoGN9dg/SXFthpRsT6vobfXRs2VBlgBHXVMLHjDNbIZv1sZ9+X3hB09cXdH1JKViyG0+W9bWZDa/r2f9zAFR71sTzGpMSWz2iI4YssWjWo3REy1MDGjdwe5e0dFSiAC1JakBvu4/CUS8Eh6dqHdU0Or0ioY3W5ClSqDXAy7/6SRfgw8vt4I+tbvvNtFT2kVDhY5+IGb1rCqYaXNF08vSALsXCPmt0kQNqJT1p5eI1mkIV/BxCY1z85lOzeFbPBQHURkkPTlwTYK9gTVE25l84IbFFN+YJDHjdpn0gq6mrHht0dkcjbM4UL9283O5p77GN+SPW/QwVB4IUYg7Or+Kp7naR6qktP98LNF2UxWo9yObPIT9KYg+hK4i56no4rfnM0qeyFf6AwAAAP//trwR3wAAAAZJREFUAwBZ0sR75itw5gAAAABJRU5ErkJggg==";

+fn assert_wall_time_line(line: &str) {
+    assert_regex_match(r"^Wall time: [0-9]+(?:\.[0-9]+)? seconds$", line);
+}
+
+fn split_wall_time_wrapped_output(output: &str) -> &str {
+    let Some((wall_time, rest)) = output.split_once('\n') else {
+        panic!("wall-time output should contain an Output section: {output}");
+    };
+    assert_wall_time_line(wall_time);
+    let Some(output) = rest.strip_prefix("Output:\n") else {
+        panic!("wall-time output should contain Output marker: {output}");
+    };
+    output
+}
+
+fn assert_wall_time_header(output: &str) {
+    let Some((wall_time, marker)) = output.split_once('\n') else {
+        panic!("wall-time header should contain an Output marker: {output}");
+    };
+    assert_wall_time_line(wall_time);
+    assert_eq!(marker, "Output:");
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
 #[serial(mcp_test_value)]
 async fn stdio_server_round_trip() -> anyhow::Result<()> {
@@ -71,7 +95,7 @@ async fn stdio_server_round_trip() -> anyhow::Result<()> {
        ]),
    )
    .await;
-    mount_sse_once(
+    let final_mock = mount_sse_once(
        &server,
        responses::sse(vec![
            responses::ev_assistant_message("msg-1", "rmcp echo tool completed successfully."),
@@ -190,6 +214,17 @@ async fn stdio_server_round_trip() -> anyhow::Result<()> {

    wait_for_event(&fixture.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;

+    let output_item = final_mock.single_request().function_call_output(call_id);
+    let output_text = output_item
+        .get("output")
+        .and_then(Value::as_str)
+        .expect("function_call_output output should be a string");
+    let wrapped_payload = split_wall_time_wrapped_output(output_text);
+    let output_json: Value = serde_json::from_str(wrapped_payload)
+        .expect("wrapped MCP output should preserve structured JSON");
+    assert_eq!(output_json["echo"], "ECHOING: ping");
+    assert_eq!(output_json["env"], expected_env_value);
+
    server.verify().await;

    Ok(())
@@ -362,15 +397,22 @@ async fn stdio_image_responses_round_trip() -> anyhow::Result<()> {
    wait_for_event(&fixture.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;

    let output_item = final_mock.single_request().function_call_output(call_id);
+    assert_eq!(output_item["type"], "function_call_output");
+    assert_eq!(output_item["call_id"], call_id);
+    let output = output_item["output"]
+        .as_array()
+        .expect("image MCP output should be content items");
+    assert_eq!(output.len(), 2);
+    assert_wall_time_header(
+        output[0]["text"]
+            .as_str()
+            .expect("first MCP image output item should be wall-time text"),
+    );
    assert_eq!(
-        output_item,
+        output[1],
        json!({
-            "type": "function_call_output",
-            "call_id": call_id,
-            "output": [{
-                "type": "input_image",
-                "image_url": OPENAI_PNG
-            }]
+            "type": "input_image",
+            "image_url": OPENAI_PNG
        })
    );
    server.verify().await;
@@ -533,7 +575,8 @@ async fn stdio_image_responses_are_sanitized_for_text_only_model() -> anyhow::Re
        .get("output")
        .and_then(Value::as_str)
        .expect("function_call_output output should be a JSON string");
-    let output_json: Value = serde_json::from_str(output_text)
+    let wrapped_payload = split_wall_time_wrapped_output(output_text);
+    let output_json: Value = serde_json::from_str(wrapped_payload)
        .expect("function_call_output output should be valid JSON");
    assert_eq!(
        output_json,
--- a/codex-rs/core/tests/suite/truncation.rs
+++ b/codex-rs/core/tests/suite/truncation.rs
@@ -28,6 +28,14 @@ use serde_json::json;
 use std::collections::HashMap;
 use std::time::Duration;

+fn assert_wall_time_header(output: &str) {
+    let (wall_time, marker) = output
+        .split_once('\n')
+        .expect("wall-time header should contain an Output marker");
+    assert_regex_match(r"^Wall time: [0-9]+(?:\.[0-9]+)? seconds$", wall_time);
+    assert_eq!(marker, "Output:");
+}
+
 // Verifies that a standard tool call (shell_command) exceeding the model formatting
 // limits is truncated before being sent back to the model.
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
@@ -400,9 +408,9 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()>
        "MCP output should not include line-based truncation header: {output}"
    );

-    let truncated_pattern = r#"(?s)^\{"echo":\s*"ECHOING: long-message-with-newlines-.*tokens truncated.*long-message-with-newlines-.*$"#;
+    let truncated_pattern = r#"(?s)^Wall time: [0-9]+(?:\.[0-9]+)? seconds\nOutput:\n\{"echo":\s*"ECHOING: long-message-with-newlines-.*tokens truncated.*long-message-with-newlines-.*$"#;
    assert_regex_match(truncated_pattern, &output);
-    assert!(output.len() < 2500, "{}", output.len());
+    assert!(output.len() < 2600, "{}", output.len());

    Ok(())
 }
@@ -502,13 +510,18 @@ async fn mcp_image_output_preserves_image_and_no_text_summary() -> Result<()> {
    // Wait for completion to ensure the outbound request is captured.
    wait_for_event(&fixture.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
    let output_item = final_mock.single_request().function_call_output(call_id);
-    // Expect exactly one array element: the image item; and no trailing summary text.
+    // Expect exactly the wall-time text and image item; no trailing truncation summary.
    let output = output_item.get("output").expect("output");
    assert!(output.is_array(), "expected array output");
    let arr = output.as_array().unwrap();
-    assert_eq!(arr.len(), 1, "no truncation summary should be appended");
+    assert_eq!(arr.len(), 2, "no truncation summary should be appended");
+    assert_wall_time_header(
+        arr[0]["text"]
+            .as_str()
+            .expect("first MCP image output item should be wall-time text"),
+    );
    assert_eq!(
-        arr[0],
+        arr[1],
        json!({"type": "input_image", "image_url": openai_png})
    );

@@ -758,22 +771,11 @@ async fn mcp_tool_call_output_not_truncated_with_custom_limit() -> Result<()> {
        .function_call_output_text(call_id)
        .context("function_call_output present for rmcp call")?;

-    let parsed: Value = serde_json::from_str(&output)?;
    assert_eq!(
        output.len(),
-        80031,
-        "parsed MCP output should retain its serialized length"
+        80065,
+        "MCP output should retain its serialized length plus wall-time header"
    );
-    let expected_echo = format!("ECHOING: {large_msg}");
-    let echo_str = parsed["echo"]
-        .as_str()
-        .context("echo field should be a string in rmcp echo output")?;
-    assert_eq!(
-        echo_str.len(),
-        expected_echo.len(),
-        "echo length should match"
-    );
-    assert_eq!(echo_str, expected_echo);
    assert!(
        !output.contains("truncated"),
        "output should not include truncation markers when limit is raised: {output}"