test: vendor zsh fork via DotSlash and stabilize zsh-fork tests (#12518)

## Why The zsh integration tests were still brittle in two ways: - they relied on `CODEX_TEST_ZSH_PATH` / environment-specific setup, so they often did not exercise the patched zsh fork that `shell-tool-mcp` ships - once the tests consistently used the vendored zsh fork, they exposed real Linux-specific zsh-fork issues in CI In particular, the Linux failures were not just test noise: - the zsh-fork launch path was dropping `ExecRequest.arg0`, so Linux `codex-linux-sandbox` arg0 dispatch did not run and zsh wrapper-mode could receive malformed arguments - the `turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2` test uses the zsh exec bridge (which talks to the parent over a Unix socket), but Linux restricted sandbox seccomp denies `connect(2)`, causing timeouts on `ubuntu-24.04` x86/arm This PR makes the zsh tests consistently run against the intended vendored zsh fork and fixes/hardens the zsh-fork path so the Linux CI signal is meaningful. ## What Changed - Added a single shared test-only DotSlash file for the patched zsh fork at `codex-rs/exec-server/tests/suite/zsh` (analogous to the existing `bash` test resource). - Updated both app-server and exec-server zsh tests to use that shared DotSlash zsh (no duplicate zsh DotSlash file, no `CODEX_TEST_ZSH_PATH` dependency). - Updated the app-server zsh-fork test helper to resolve the shared DotSlash zsh and avoid silently falling back to host zsh. - Kept the app-server zsh-fork tests configured via `config.toml`, using a test wrapper path where needed to force `zsh -df` (and rewrite `-lc` to `-c`) for the subcommand-decline test. - Hardened the app-server subcommand-decline zsh-fork test for CI variability: - tolerate an extra `/responses` POST with a no-op mock response - tolerate non-target approval ordering while remaining strict on the two `/usr/bin/true` approvals and decline behavior - use `DangerFullAccess` on Linux for this one test because it validates zsh approval flow, not Linux sandbox socket restrictions - Fixed zsh-fork process launching on Linux by preserving `req.arg0` in `ZshExecBridge::execute_shell_request(...)` so `codex-linux-sandbox` arg0 dispatch continues to work. - Moved `maybe_run_zsh_exec_wrapper_mode()` under `arg0_dispatch_or_else(...)` in `app-server` and `cli` so wrapper-mode handling coexists correctly with arg0-dispatched helper modes. - Consolidated duplicated `dotslash -- fetch` resolution logic into shared test support (`core/tests/common/lib.rs`). - Updated `codex-rs/exec-server/tests/suite/accept_elicitation.rs` to use DotSlash zsh and hardened the zsh elicitation test for Bazel/zsh differences by: - resolving an absolute `git` path - running `git init --quiet .` - asserting success / `.git` creation instead of relying on banner text ## Verification - `cargo test -p codex-app-server turn_start_zsh_fork -- --nocapture` - `cargo test -p codex-exec-server accept_elicitation -- --nocapture` - `bazel test //codex-rs/exec-server:exec-server-all-test --test_output=streamed --test_arg=--nocapture --test_arg=accept_elicitation_for_prompt_rule_with_zsh` - CI (`rust-ci`) on the final cleaned commit: `Tests — ubuntu-24.04 - x86_64-unknown-linux-gnu` and `Tests — ubuntu-24.04-arm - aarch64-unknown-linux-gnu` passed in [run 22291424358](https://github.com/openai/codex/actions/runs/22291424358)
2026-04-28 10:21:06 +03:00 · 2026-02-22 19:39:56 -08:00
parent 7e569f1162
commit e8949f4507
9 changed files with 305 additions and 98 deletions
--- a/codex-rs/app-server/tests/suite/v2/turn_start_zsh_fork.rs
+++ b/codex-rs/app-server/tests/suite/v2/turn_start_zsh_fork.rs
@@ -2,18 +2,15 @@
 //
 // Running these tests with the patched zsh fork:
 //
-// The suite uses `CODEX_TEST_ZSH_PATH` when set. Example:
-//   CODEX_TEST_ZSH_PATH="$HOME/.local/codex-zsh-77045ef/bin/zsh" \
-//   cargo test -p codex-app-server turn_start_zsh_fork -- --nocapture
-//
-// For a single test:
-//   CODEX_TEST_ZSH_PATH="$HOME/.local/codex-zsh-77045ef/bin/zsh" \
-//   cargo test -p codex-app-server turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2 -- --nocapture
+// The suite resolves the shared test-only zsh DotSlash file at
+// `exec-server/tests/suite/zsh` via DotSlash on first use, so `dotslash` and
+// network access are required the first time the artifact is fetched.

 use anyhow::Result;
 use app_test_support::McpProcess;
 use app_test_support::create_final_assistant_message_sse_response;
 use app_test_support::create_mock_responses_server_sequence;
+use app_test_support::create_mock_responses_server_sequence_unchecked;
 use app_test_support::create_shell_command_sse_response;
 use app_test_support::to_response;
 use codex_app_server_protocol::CommandExecutionApprovalDecision;
@@ -38,6 +35,7 @@ use core_test_support::responses;
 use core_test_support::skip_if_no_network;
 use pretty_assertions::assert_eq;
 use std::collections::BTreeMap;
+use std::os::unix::fs::PermissionsExt;
 use std::path::Path;
 use tempfile::TempDir;
 use tokio::time::timeout;
@@ -57,7 +55,7 @@ async fn turn_start_shell_zsh_fork_executes_command_v2() -> Result<()> {
    let workspace = tmp.path().join("workspace");
    std::fs::create_dir(&workspace)?;

-    let Some(zsh_path) = find_test_zsh_path() else {
+    let Some(zsh_path) = find_test_zsh_path()? else {
        eprintln!("skipping zsh fork test: no zsh executable found");
        return Ok(());
    };
@@ -82,7 +80,7 @@ async fn turn_start_shell_zsh_fork_executes_command_v2() -> Result<()> {
        &zsh_path,
    )?;

-    let mut mcp = McpProcess::new(&codex_home).await?;
+    let mut mcp = create_zsh_test_mcp_process(&codex_home, &workspace).await?;
    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    let start_id = mcp
@@ -167,7 +165,7 @@ async fn turn_start_shell_zsh_fork_exec_approval_decline_v2() -> Result<()> {
    let workspace = tmp.path().join("workspace");
    std::fs::create_dir(&workspace)?;

-    let Some(zsh_path) = find_test_zsh_path() else {
+    let Some(zsh_path) = find_test_zsh_path()? else {
        eprintln!("skipping zsh fork decline test: no zsh executable found");
        return Ok(());
    };
@@ -199,7 +197,7 @@ async fn turn_start_shell_zsh_fork_exec_approval_decline_v2() -> Result<()> {
        &zsh_path,
    )?;

-    let mut mcp = McpProcess::new(&codex_home).await?;
+    let mut mcp = create_zsh_test_mcp_process(&codex_home, &workspace).await?;
    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    let start_id = mcp
@@ -303,7 +301,7 @@ async fn turn_start_shell_zsh_fork_exec_approval_cancel_v2() -> Result<()> {
    let workspace = tmp.path().join("workspace");
    std::fs::create_dir(&workspace)?;

-    let Some(zsh_path) = find_test_zsh_path() else {
+    let Some(zsh_path) = find_test_zsh_path()? else {
        eprintln!("skipping zsh fork cancel test: no zsh executable found");
        return Ok(());
    };
@@ -332,7 +330,7 @@ async fn turn_start_shell_zsh_fork_exec_approval_cancel_v2() -> Result<()> {
        &zsh_path,
    )?;

-    let mut mcp = McpProcess::new(&codex_home).await?;
+    let mut mcp = create_zsh_test_mcp_process(&codex_home, &workspace).await?;
    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    let start_id = mcp
@@ -434,7 +432,7 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
    let workspace = tmp.path().join("workspace");
    std::fs::create_dir(&workspace)?;

-    let Some(zsh_path) = find_test_zsh_path() else {
+    let Some(zsh_path) = find_test_zsh_path()? else {
        eprintln!("skipping zsh fork subcommand decline test: no zsh executable found");
        return Ok(());
    };
@@ -446,6 +444,29 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
        return Ok(());
    }
    eprintln!("using zsh path for zsh-fork test: {}", zsh_path.display());
+    let zsh_path_for_config = {
+        // App-server config accepts only a zsh path, not extra argv. Use a
+        // wrapper so this test can force `-df` and downgrade `-lc` to `-c`
+        // to avoid rc/login-shell startup noise.
+        let path = workspace.join("zsh-no-rc");
+        std::fs::write(
+            &path,
+            format!(
+                r#"#!/bin/sh
+if [ "$1" = "-lc" ]; then
+  shift
+  set -- -c "$@"
+fi
+exec "{}" -df "$@"
+"#,
+                zsh_path.display()
+            ),
+        )?;
+        let mut permissions = std::fs::metadata(&path)?.permissions();
+        permissions.set_mode(0o755);
+        std::fs::set_permissions(&path, permissions)?;
+        path
+    };

    let tool_call_arguments = serde_json::to_string(&serde_json::json!({
        "command": "/usr/bin/true && /usr/bin/true",
@@ -461,7 +482,16 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
        ),
        responses::ev_completed("resp-1"),
    ]);
-    let server = create_mock_responses_server_sequence(vec![response]).await;
+    let no_op_response = responses::sse(vec![
+        responses::ev_response_created("resp-2"),
+        responses::ev_completed("resp-2"),
+    ]);
+    // Linux CI has occasionally issued a second `/responses` POST after the
+    // subcommand-decline flow. This test is about approval/decline behavior in
+    // the zsh fork, not exact model request count, so allow an extra request
+    // and return a harmless no-op response if it arrives.
+    let server =
+        create_mock_responses_server_sequence_unchecked(vec![response, no_op_response]).await;
    create_config_toml(
        &codex_home,
        &server.uri(),
@@ -471,10 +501,10 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
            (Feature::UnifiedExec, false),
            (Feature::ShellSnapshot, false),
        ]),
-        &zsh_path,
+        &zsh_path_for_config,
    )?;

-    let mut mcp = McpProcess::new(&codex_home).await?;
+    let mut mcp = create_zsh_test_mcp_process(&codex_home, &workspace).await?;
    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    let start_id = mcp
@@ -500,8 +530,16 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
            }],
            cwd: Some(workspace.clone()),
            approval_policy: Some(codex_app_server_protocol::AskForApproval::OnRequest),
-            sandbox_policy: Some(codex_app_server_protocol::SandboxPolicy::ReadOnly {
-                access: codex_app_server_protocol::ReadOnlyAccess::FullAccess,
+            sandbox_policy: Some(if cfg!(target_os = "linux") {
+                // The zsh exec-bridge wrapper uses a Unix socket back to the parent
+                // process. Linux restricted sandbox seccomp denies connect(2), so use
+                // full access here; this test is validating zsh approval/decline
+                // behavior, not Linux sandboxing.
+                codex_app_server_protocol::SandboxPolicy::DangerFullAccess
+            } else {
+                codex_app_server_protocol::SandboxPolicy::ReadOnly {
+                    access: codex_app_server_protocol::ReadOnlyAccess::FullAccess,
+                }
            }),
            model: Some("mock-model".to_string()),
            effort: Some(codex_protocol::openai_models::ReasoningEffort::Medium),
@@ -517,10 +555,13 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
    let TurnStartResponse { turn } = to_response::<TurnStartResponse>(turn_resp)?;

    let mut approval_ids = Vec::new();
-    for decision in [
+    let mut saw_parent_approval = false;
+    let target_decisions = [
        CommandExecutionApprovalDecision::Accept,
        CommandExecutionApprovalDecision::Cancel,
-    ] {
+    ];
+    let mut target_decision_index = 0;
+    while target_decision_index < target_decisions.len() {
        let server_req = timeout(
            DEFAULT_READ_TIMEOUT,
            mcp.read_stream_until_request_message(),
@@ -531,13 +572,40 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
            panic!("expected CommandExecutionRequestApproval request");
        };
        assert_eq!(params.item_id, "call-zsh-fork-subcommand-decline");
-        approval_ids.push(
-            params
-                .approval_id
-                .clone()
-                .expect("approval_id must be present for zsh subcommand approvals"),
-        );
        assert_eq!(params.thread_id, thread.id);
+        let is_target_subcommand = params.command.as_deref() == Some("/usr/bin/true");
+        if is_target_subcommand {
+            approval_ids.push(
+                params
+                    .approval_id
+                    .clone()
+                    .expect("approval_id must be present for zsh subcommand approvals"),
+            );
+        }
+        let decision = if is_target_subcommand {
+            let decision = target_decisions[target_decision_index].clone();
+            target_decision_index += 1;
+            decision
+        } else {
+            let command = params
+                .command
+                .as_deref()
+                .expect("approval command should be present");
+            assert!(
+                !saw_parent_approval,
+                "unexpected extra non-target approval: {command}"
+            );
+            assert!(
+                command.contains("zsh-no-rc"),
+                "expected parent zsh wrapper approval, got: {command}"
+            );
+            assert!(
+                command.contains("/usr/bin/true && /usr/bin/true"),
+                "expected tool command in parent approval, got: {command}"
+            );
+            saw_parent_approval = true;
+            CommandExecutionApprovalDecision::Accept
+        };
        mcp.send_response(
            request_id,
            serde_json::to_value(CommandExecutionRequestApprovalResponse { decision })?,
@@ -545,6 +613,8 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
        .await?;
    }

+    assert_eq!(approval_ids.len(), 2);
+    assert_ne!(approval_ids[0], approval_ids[1]);
    let parent_completed_command_execution = timeout(DEFAULT_READ_TIMEOUT, async {
        loop {
            let completed_notif = mcp
@@ -563,32 +633,61 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
            }
        }
    })
-    .await??;
+    .await;

-    let ThreadItem::CommandExecution {
-        id,
-        status,
-        aggregated_output,
-        ..
-    } = parent_completed_command_execution
-    else {
-        unreachable!("loop ensures we break on parent command execution item");
-    };
-    assert_eq!(id, "call-zsh-fork-subcommand-decline");
-    assert_eq!(status, CommandExecutionStatus::Declined);
-    assert!(
-        aggregated_output.is_none()
-            || aggregated_output == Some("exec command rejected by user".to_string())
-    );
-    assert_eq!(approval_ids.len(), 2);
-    assert_ne!(approval_ids[0], approval_ids[1]);
+    match parent_completed_command_execution {
+        Ok(Ok(parent_completed_command_execution)) => {
+            let ThreadItem::CommandExecution {
+                id,
+                status,
+                aggregated_output,
+                ..
+            } = parent_completed_command_execution
+            else {
+                unreachable!("loop ensures we break on parent command execution item");
+            };
+            assert_eq!(id, "call-zsh-fork-subcommand-decline");
+            assert_eq!(status, CommandExecutionStatus::Declined);
+            assert!(
+                aggregated_output.is_none()
+                    || aggregated_output == Some("exec command rejected by user".to_string())
+            );

-    mcp.interrupt_turn_and_wait_for_aborted(thread.id, turn.id, DEFAULT_READ_TIMEOUT)
-        .await?;
+            mcp.interrupt_turn_and_wait_for_aborted(
+                thread.id.clone(),
+                turn.id.clone(),
+                DEFAULT_READ_TIMEOUT,
+            )
+            .await?;
+        }
+        Ok(Err(error)) => return Err(error),
+        Err(_) => {
+            // Some zsh builds abort the turn immediately after the rejected
+            // subcommand without emitting a parent `item/completed`.
+            let completed_notif = timeout(
+                DEFAULT_READ_TIMEOUT,
+                mcp.read_stream_until_notification_message("turn/completed"),
+            )
+            .await??;
+            let completed: TurnCompletedNotification = serde_json::from_value(
+                completed_notif
+                    .params
+                    .expect("turn/completed params must be present"),
+            )?;
+            assert_eq!(completed.thread_id, thread.id);
+            assert_eq!(completed.turn.id, turn.id);
+            assert_eq!(completed.turn.status, TurnStatus::Interrupted);
+        }
+    }

    Ok(())
 }

+async fn create_zsh_test_mcp_process(codex_home: &Path, zdotdir: &Path) -> Result<McpProcess> {
+    let zdotdir = zdotdir.to_string_lossy().into_owned();
+    McpProcess::new_with_env(codex_home, &[("ZDOTDIR", Some(zdotdir.as_str()))]).await
+}
+
 fn create_config_toml(
    codex_home: &Path,
    server_uri: &str,
@@ -640,36 +739,24 @@ stream_max_retries = 0
    )
 }

-fn find_test_zsh_path() -> Option<std::path::PathBuf> {
-    if let Some(path) = std::env::var_os("CODEX_TEST_ZSH_PATH") {
-        let path = std::path::PathBuf::from(path);
-        if path.is_file() {
-            return Some(path);
-        }
-        panic!(
-            "CODEX_TEST_ZSH_PATH is set but is not a file: {}",
-            path.display()
+fn find_test_zsh_path() -> Result<Option<std::path::PathBuf>> {
+    let repo_root = codex_utils_cargo_bin::repo_root()?;
+    let dotslash_zsh = repo_root.join("codex-rs/exec-server/tests/suite/zsh");
+    if !dotslash_zsh.is_file() {
+        eprintln!(
+            "skipping zsh fork test: shared zsh DotSlash file not found at {}",
+            dotslash_zsh.display()
        );
+        return Ok(None);
    }
-
-    for candidate in ["/bin/zsh", "/usr/bin/zsh"] {
-        let path = Path::new(candidate);
-        if path.is_file() {
-            return Some(path.to_path_buf());
+    match core_test_support::fetch_dotslash_file(&dotslash_zsh, None) {
+        Ok(path) => return Ok(Some(path)),
+        Err(error) => {
+            eprintln!("failed to fetch vendored zsh via dotslash: {error:#}");
        }
    }

-    let shell = std::env::var_os("SHELL")?;
-    let shell_path = std::path::PathBuf::from(shell);
-    if shell_path
-        .file_name()
-        .is_some_and(|file_name| file_name == "zsh")
-        && shell_path.is_file()
-    {
-        return Some(shell_path);
-    }
-
-    None
+    Ok(None)
 }

 fn supports_exec_wrapper_intercept(zsh_path: &Path) -> bool {