Migrate coverage to shell_command (#7042)

2026-04-30 11:21:34 +03:00 · 2025-11-20 19:44:00 -08:00
parent 830ab4ce20
commit 767b66f407
25 changed files with 284 additions and 262 deletions
--- a/codex-rs/core/tests/suite/truncation.rs
+++ b/codex-rs/core/tests/suite/truncation.rs
@@ -98,7 +98,7 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
    Ok(())
 }

-// Verifies that a standard tool call (shell) exceeding the model formatting
+// Verifies that a standard tool call (shell_command) exceeding the model formatting
 // limits is truncated before being sent back to the model.
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn tool_call_output_configured_limit_chars_type() -> Result<()> {
@@ -106,7 +106,7 @@ async fn tool_call_output_configured_limit_chars_type() -> Result<()> {

    let server = start_mock_server().await;

-    // Use a model that exposes the generic shell tool.
+    // Use a model that exposes the shell_command tool.
    let mut builder = test_codex().with_model("gpt-5.1").with_config(|config| {
        config.tool_output_token_limit = Some(100_000);
    });
@@ -114,28 +114,22 @@ async fn tool_call_output_configured_limit_chars_type() -> Result<()> {
    let fixture = builder.build(&server).await?;

    let call_id = "shell-too-large";
-    let args = if cfg!(windows) {
-        serde_json::json!({
-            "command": [
-                "powershell",
-                "-Command",
-                "for ($i=1; $i -le 100000; $i++) { Write-Output $i }"
-            ],
-            "timeout_ms": 5_000,
-        })
+    let command = if cfg!(windows) {
+        "for ($i=1; $i -le 100000; $i++) { Write-Output $i }"
    } else {
-        serde_json::json!({
-            "command": ["/bin/sh", "-c", "seq 1 100000"],
-            "timeout_ms": 5_000,
-        })
+        "seq 1 100000"
    };
+    let args = serde_json::json!({
+        "command": command,
+        "timeout_ms": 5_000,
+    });

    // First response: model tells us to run the tool; second: complete the turn.
    mount_sse_once(
        &server,
        sse(vec![
            responses::ev_response_created("resp-1"),
-            responses::ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
+            responses::ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
            responses::ev_completed("resp-1"),
        ]),
    )
@@ -167,7 +161,10 @@ async fn tool_call_output_configured_limit_chars_type() -> Result<()> {
        "expected truncated shell output to be plain text"
    );

-    assert_eq!(output.len(), 400097, "we should be almost 100k tokens");
+    assert!(
+        (400000..=401000).contains(&output.len()),
+        "we should be almost 100k tokens"
+    );

    assert!(
        !output.contains("tokens truncated"),
@@ -177,7 +174,7 @@ async fn tool_call_output_configured_limit_chars_type() -> Result<()> {
    Ok(())
 }

-// Verifies that a standard tool call (shell) exceeding the model formatting
+// Verifies that a standard tool call (shell_command) exceeding the model formatting
 // limits is truncated before being sent back to the model.
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn tool_call_output_exceeds_limit_truncated_chars_limit() -> Result<()> {
@@ -185,34 +182,28 @@ async fn tool_call_output_exceeds_limit_truncated_chars_limit() -> Result<()> {

    let server = start_mock_server().await;

-    // Use a model that exposes the generic shell tool.
+    // Use a model that exposes the shell_command tool.
    let mut builder = test_codex().with_model("gpt-5.1");

    let fixture = builder.build(&server).await?;

    let call_id = "shell-too-large";
-    let args = if cfg!(windows) {
-        serde_json::json!({
-            "command": [
-                "powershell",
-                "-Command",
-                "for ($i=1; $i -le 100000; $i++) { Write-Output $i }"
-            ],
-            "timeout_ms": 5_000,
-        })
+    let command = if cfg!(windows) {
+        "for ($i=1; $i -le 100000; $i++) { Write-Output $i }"
    } else {
-        serde_json::json!({
-            "command": ["/bin/sh", "-c", "seq 1 100000"],
-            "timeout_ms": 5_000,
-        })
+        "seq 1 100000"
    };
+    let args = serde_json::json!({
+        "command": command,
+        "timeout_ms": 5_000,
+    });

    // First response: model tells us to run the tool; second: complete the turn.
    mount_sse_once(
        &server,
        sse(vec![
            responses::ev_response_created("resp-1"),
-            responses::ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
+            responses::ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
            responses::ev_completed("resp-1"),
        ]),
    )
@@ -250,14 +241,14 @@ async fn tool_call_output_exceeds_limit_truncated_chars_limit() -> Result<()> {

    let len = output.len();
    assert!(
-        (9_900..=10_000).contains(&len),
+        (9_900..=10_100).contains(&len),
        "expected ~10k chars after truncation, got {len}"
    );

    Ok(())
 }

-// Verifies that a standard tool call (shell) exceeding the model formatting
+// Verifies that a standard tool call (shell_command) exceeding the model formatting
 // limits is truncated before being sent back to the model.
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
@@ -265,7 +256,7 @@ async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {

    let server = start_mock_server().await;

-    // Use a model that exposes the generic shell tool.
+    // Use a model that exposes the shell_command tool.
    let mut builder = test_codex().with_config(|config| {
        config.model = "gpt-5.1-codex".to_string();
        config.model_family =
@@ -274,28 +265,22 @@ async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
    let fixture = builder.build(&server).await?;

    let call_id = "shell-too-large";
-    let args = if cfg!(windows) {
-        serde_json::json!({
-            "command": [
-                "powershell",
-                "-Command",
-                "for ($i=1; $i -le 100000; $i++) { Write-Output $i }"
-            ],
-            "timeout_ms": 5_000,
-        })
+    let command = if cfg!(windows) {
+        "for ($i=1; $i -le 100000; $i++) { Write-Output $i }"
    } else {
-        serde_json::json!({
-            "command": ["/bin/sh", "-c", "seq 1 100000"],
-            "timeout_ms": 5_000,
-        })
+        "seq 1 100000"
    };
+    let args = serde_json::json!({
+        "command": command,
+        "timeout_ms": 5_000,
+    });

    // First response: model tells us to run the tool; second: complete the turn.
    mount_sse_once(
        &server,
        sse(vec![
            responses::ev_response_created("resp-1"),
-            responses::ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
+            responses::ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
            responses::ev_completed("resp-1"),
        ]),
    )
@@ -345,7 +330,7 @@ $"#;
    Ok(())
 }

-// Ensures shell tool outputs that exceed the line limit are truncated only once.
+// Ensures shell_command outputs that exceed the line limit are truncated only once.
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn tool_call_output_truncated_only_once() -> Result<()> {
    skip_if_no_network!(Ok(()));
@@ -359,27 +344,21 @@ async fn tool_call_output_truncated_only_once() -> Result<()> {
    });
    let fixture = builder.build(&server).await?;
    let call_id = "shell-single-truncation";
-    let args = if cfg!(windows) {
-        serde_json::json!({
-            "command": [
-                "powershell",
-                "-Command",
-                "for ($i=1; $i -le 10000; $i++) { Write-Output $i }"
-            ],
-            "timeout_ms": 5_000,
-        })
+    let command = if cfg!(windows) {
+        "for ($i=1; $i -le 10000; $i++) { Write-Output $i }"
    } else {
-        serde_json::json!({
-            "command": ["/bin/sh", "-c", "seq 1 10000"],
-            "timeout_ms": 5_000,
-        })
+        "seq 1 10000"
    };
+    let args = serde_json::json!({
+        "command": command,
+        "timeout_ms": 5_000,
+    });

    mount_sse_once(
        &server,
        sse(vec![
            responses::ev_response_created("resp-1"),
-            responses::ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
+            responses::ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
            responses::ev_completed("resp-1"),
        ]),
    )
@@ -619,7 +598,7 @@ async fn token_policy_marker_reports_tokens() -> Result<()> {

    let call_id = "shell-token-marker";
    let args = json!({
-        "command": ["/bin/sh", "-c", "seq 1 150"],
+        "command": "seq 1 150",
        "timeout_ms": 5_000,
    });

@@ -627,7 +606,7 @@ async fn token_policy_marker_reports_tokens() -> Result<()> {
        &server,
        sse(vec![
            ev_response_created("resp-1"),
-            ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
+            ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
            ev_completed("resp-1"),
        ]),
    )
@@ -650,7 +629,7 @@ async fn token_policy_marker_reports_tokens() -> Result<()> {
        .function_call_output_text(call_id)
        .context("shell output present")?;

-    let pattern = r#"(?s)^\{"output":"Total output lines: 150\\n\\n1\\n2\\n3\\n4\\n5\\n.*?…\d+ tokens truncated…7\\n138\\n139\\n140\\n141\\n142\\n143\\n144\\n145\\n146\\n147\\n148\\n149\\n150\\n","metadata":\{"exit_code":0,"duration_seconds":0\.0\}\}$"#;
+    let pattern = r"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nTotal output lines: 150\nOutput:\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n13\n14\n15\n16\n17\n18\n19.*tokens truncated.*129\n130\n131\n132\n133\n134\n135\n136\n137\n138\n139\n140\n141\n142\n143\n144\n145\n146\n147\n148\n149\n150\n$";

    assert_regex_match(pattern, &output);

@@ -672,7 +651,7 @@ async fn byte_policy_marker_reports_bytes() -> Result<()> {

    let call_id = "shell-byte-marker";
    let args = json!({
-        "command": ["/bin/sh", "-c", "seq 1 150"],
+        "command": "seq 1 150",
        "timeout_ms": 5_000,
    });

@@ -680,7 +659,7 @@ async fn byte_policy_marker_reports_bytes() -> Result<()> {
        &server,
        sse(vec![
            ev_response_created("resp-1"),
-            ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
+            ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
            ev_completed("resp-1"),
        ]),
    )
@@ -703,16 +682,16 @@ async fn byte_policy_marker_reports_bytes() -> Result<()> {
        .function_call_output_text(call_id)
        .context("shell output present")?;

-    let pattern = r#"(?s)^\{"output":"Total output lines: 150\\n\\n1\\n2\\n3\\n4\\n5.*?…\d+ chars truncated…7\\n138\\n139\\n140\\n141\\n142\\n143\\n144\\n145\\n146\\n147\\n148\\n149\\n150\\n","metadata":\{"exit_code":0,"duration_seconds":0\.0\}\}$"#;
+    let pattern = r"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nTotal output lines: 150\nOutput:\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n13\n14\n15\n16\n17\n18\n19.*chars truncated.*129\n130\n131\n132\n133\n134\n135\n136\n137\n138\n139\n140\n141\n142\n143\n144\n145\n146\n147\n148\n149\n150\n$";

    assert_regex_match(pattern, &output);

    Ok(())
 }

-// Shell tool output should remain intact when the config opts into a large token budget.
+// shell_command output should remain intact when the config opts into a large token budget.
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn shell_tool_output_not_truncated_with_custom_limit() -> Result<()> {
+async fn shell_command_output_not_truncated_with_custom_limit() -> Result<()> {
    skip_if_no_network!(Ok(()));

    let server = start_mock_server().await;
@@ -726,7 +705,7 @@ async fn shell_tool_output_not_truncated_with_custom_limit() -> Result<()> {

    let call_id = "shell-no-trunc";
    let args = json!({
-        "command": ["/bin/sh", "-c", "seq 1 1000"],
+        "command": "seq 1 1000",
        "timeout_ms": 5_000,
    });
    let expected_body: String = (1..=1000).map(|i| format!("{i}\n")).collect();
@@ -735,7 +714,7 @@ async fn shell_tool_output_not_truncated_with_custom_limit() -> Result<()> {
        &server,
        sse(vec![
            ev_response_created("resp-1"),
-            ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
+            ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
            ev_completed("resp-1"),
        ]),
    )