test: vendor zsh fork via DotSlash and stabilize zsh-fork tests (#12518)

## Why

The zsh integration tests were still brittle in two ways:

- they relied on `CODEX_TEST_ZSH_PATH` / environment-specific setup, so
they often did not exercise the patched zsh fork that `shell-tool-mcp`
ships
- once the tests consistently used the vendored zsh fork, they exposed
real Linux-specific zsh-fork issues in CI

In particular, the Linux failures were not just test noise:

- the zsh-fork launch path was dropping `ExecRequest.arg0`, so Linux
`codex-linux-sandbox` arg0 dispatch did not run and zsh wrapper-mode
could receive malformed arguments
- the
`turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2`
test uses the zsh exec bridge (which talks to the parent over a Unix
socket), but Linux restricted sandbox seccomp denies `connect(2)`,
causing timeouts on `ubuntu-24.04` x86/arm

This PR makes the zsh tests consistently run against the intended
vendored zsh fork and fixes/hardens the zsh-fork path so the Linux CI
signal is meaningful.

## What Changed

- Added a single shared test-only DotSlash file for the patched zsh fork
at `codex-rs/exec-server/tests/suite/zsh` (analogous to the existing
`bash` test resource).
- Updated both app-server and exec-server zsh tests to use that shared
DotSlash zsh (no duplicate zsh DotSlash file, no `CODEX_TEST_ZSH_PATH`
dependency).
- Updated the app-server zsh-fork test helper to resolve the shared
DotSlash zsh and avoid silently falling back to host zsh.
- Kept the app-server zsh-fork tests configured via `config.toml`, using
a test wrapper path where needed to force `zsh -df` (and rewrite `-lc`
to `-c`) for the subcommand-decline test.
- Hardened the app-server subcommand-decline zsh-fork test for CI
variability:
  - tolerate an extra `/responses` POST with a no-op mock response
- tolerate non-target approval ordering while remaining strict on the
two `/usr/bin/true` approvals and decline behavior
- use `DangerFullAccess` on Linux for this one test because it validates
zsh approval flow, not Linux sandbox socket restrictions
- Fixed zsh-fork process launching on Linux by preserving `req.arg0` in
`ZshExecBridge::execute_shell_request(...)` so `codex-linux-sandbox`
arg0 dispatch continues to work.
- Moved `maybe_run_zsh_exec_wrapper_mode()` under
`arg0_dispatch_or_else(...)` in `app-server` and `cli` so wrapper-mode
handling coexists correctly with arg0-dispatched helper modes.
- Consolidated duplicated `dotslash -- fetch` resolution logic into
shared test support (`core/tests/common/lib.rs`).
- Updated `codex-rs/exec-server/tests/suite/accept_elicitation.rs` to
use DotSlash zsh and hardened the zsh elicitation test for Bazel/zsh
differences by:
  - resolving an absolute `git` path
  - running `git init --quiet .`
- asserting success / `.git` creation instead of relying on banner text

## Verification

- `cargo test -p codex-app-server turn_start_zsh_fork -- --nocapture`
- `cargo test -p codex-exec-server accept_elicitation -- --nocapture`
- `bazel test //codex-rs/exec-server:exec-server-all-test
--test_output=streamed --test_arg=--nocapture
--test_arg=accept_elicitation_for_prompt_rule_with_zsh`
- CI (`rust-ci`) on the final cleaned commit: `Tests — ubuntu-24.04 -
x86_64-unknown-linux-gnu` and `Tests — ubuntu-24.04-arm -
aarch64-unknown-linux-gnu` passed in [run
22291424358](https://github.com/openai/codex/actions/runs/22291424358)
This commit is contained in:
Michael Bolin
2026-02-22 19:39:56 -08:00
committed by GitHub
parent 7e569f1162
commit e8949f4507
9 changed files with 305 additions and 98 deletions

View File

@@ -2,18 +2,15 @@
//
// Running these tests with the patched zsh fork:
//
// The suite uses `CODEX_TEST_ZSH_PATH` when set. Example:
// CODEX_TEST_ZSH_PATH="$HOME/.local/codex-zsh-77045ef/bin/zsh" \
// cargo test -p codex-app-server turn_start_zsh_fork -- --nocapture
//
// For a single test:
// CODEX_TEST_ZSH_PATH="$HOME/.local/codex-zsh-77045ef/bin/zsh" \
// cargo test -p codex-app-server turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2 -- --nocapture
// The suite resolves the shared test-only zsh DotSlash file at
// `exec-server/tests/suite/zsh` via DotSlash on first use, so `dotslash` and
// network access are required the first time the artifact is fetched.
use anyhow::Result;
use app_test_support::McpProcess;
use app_test_support::create_final_assistant_message_sse_response;
use app_test_support::create_mock_responses_server_sequence;
use app_test_support::create_mock_responses_server_sequence_unchecked;
use app_test_support::create_shell_command_sse_response;
use app_test_support::to_response;
use codex_app_server_protocol::CommandExecutionApprovalDecision;
@@ -38,6 +35,7 @@ use core_test_support::responses;
use core_test_support::skip_if_no_network;
use pretty_assertions::assert_eq;
use std::collections::BTreeMap;
use std::os::unix::fs::PermissionsExt;
use std::path::Path;
use tempfile::TempDir;
use tokio::time::timeout;
@@ -57,7 +55,7 @@ async fn turn_start_shell_zsh_fork_executes_command_v2() -> Result<()> {
let workspace = tmp.path().join("workspace");
std::fs::create_dir(&workspace)?;
let Some(zsh_path) = find_test_zsh_path() else {
let Some(zsh_path) = find_test_zsh_path()? else {
eprintln!("skipping zsh fork test: no zsh executable found");
return Ok(());
};
@@ -82,7 +80,7 @@ async fn turn_start_shell_zsh_fork_executes_command_v2() -> Result<()> {
&zsh_path,
)?;
let mut mcp = McpProcess::new(&codex_home).await?;
let mut mcp = create_zsh_test_mcp_process(&codex_home, &workspace).await?;
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
let start_id = mcp
@@ -167,7 +165,7 @@ async fn turn_start_shell_zsh_fork_exec_approval_decline_v2() -> Result<()> {
let workspace = tmp.path().join("workspace");
std::fs::create_dir(&workspace)?;
let Some(zsh_path) = find_test_zsh_path() else {
let Some(zsh_path) = find_test_zsh_path()? else {
eprintln!("skipping zsh fork decline test: no zsh executable found");
return Ok(());
};
@@ -199,7 +197,7 @@ async fn turn_start_shell_zsh_fork_exec_approval_decline_v2() -> Result<()> {
&zsh_path,
)?;
let mut mcp = McpProcess::new(&codex_home).await?;
let mut mcp = create_zsh_test_mcp_process(&codex_home, &workspace).await?;
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
let start_id = mcp
@@ -303,7 +301,7 @@ async fn turn_start_shell_zsh_fork_exec_approval_cancel_v2() -> Result<()> {
let workspace = tmp.path().join("workspace");
std::fs::create_dir(&workspace)?;
let Some(zsh_path) = find_test_zsh_path() else {
let Some(zsh_path) = find_test_zsh_path()? else {
eprintln!("skipping zsh fork cancel test: no zsh executable found");
return Ok(());
};
@@ -332,7 +330,7 @@ async fn turn_start_shell_zsh_fork_exec_approval_cancel_v2() -> Result<()> {
&zsh_path,
)?;
let mut mcp = McpProcess::new(&codex_home).await?;
let mut mcp = create_zsh_test_mcp_process(&codex_home, &workspace).await?;
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
let start_id = mcp
@@ -434,7 +432,7 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
let workspace = tmp.path().join("workspace");
std::fs::create_dir(&workspace)?;
let Some(zsh_path) = find_test_zsh_path() else {
let Some(zsh_path) = find_test_zsh_path()? else {
eprintln!("skipping zsh fork subcommand decline test: no zsh executable found");
return Ok(());
};
@@ -446,6 +444,29 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
return Ok(());
}
eprintln!("using zsh path for zsh-fork test: {}", zsh_path.display());
let zsh_path_for_config = {
// App-server config accepts only a zsh path, not extra argv. Use a
// wrapper so this test can force `-df` and downgrade `-lc` to `-c`
// to avoid rc/login-shell startup noise.
let path = workspace.join("zsh-no-rc");
std::fs::write(
&path,
format!(
r#"#!/bin/sh
if [ "$1" = "-lc" ]; then
shift
set -- -c "$@"
fi
exec "{}" -df "$@"
"#,
zsh_path.display()
),
)?;
let mut permissions = std::fs::metadata(&path)?.permissions();
permissions.set_mode(0o755);
std::fs::set_permissions(&path, permissions)?;
path
};
let tool_call_arguments = serde_json::to_string(&serde_json::json!({
"command": "/usr/bin/true && /usr/bin/true",
@@ -461,7 +482,16 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
),
responses::ev_completed("resp-1"),
]);
let server = create_mock_responses_server_sequence(vec![response]).await;
let no_op_response = responses::sse(vec![
responses::ev_response_created("resp-2"),
responses::ev_completed("resp-2"),
]);
// Linux CI has occasionally issued a second `/responses` POST after the
// subcommand-decline flow. This test is about approval/decline behavior in
// the zsh fork, not exact model request count, so allow an extra request
// and return a harmless no-op response if it arrives.
let server =
create_mock_responses_server_sequence_unchecked(vec![response, no_op_response]).await;
create_config_toml(
&codex_home,
&server.uri(),
@@ -471,10 +501,10 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
(Feature::UnifiedExec, false),
(Feature::ShellSnapshot, false),
]),
&zsh_path,
&zsh_path_for_config,
)?;
let mut mcp = McpProcess::new(&codex_home).await?;
let mut mcp = create_zsh_test_mcp_process(&codex_home, &workspace).await?;
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
let start_id = mcp
@@ -500,8 +530,16 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
}],
cwd: Some(workspace.clone()),
approval_policy: Some(codex_app_server_protocol::AskForApproval::OnRequest),
sandbox_policy: Some(codex_app_server_protocol::SandboxPolicy::ReadOnly {
access: codex_app_server_protocol::ReadOnlyAccess::FullAccess,
sandbox_policy: Some(if cfg!(target_os = "linux") {
// The zsh exec-bridge wrapper uses a Unix socket back to the parent
// process. Linux restricted sandbox seccomp denies connect(2), so use
// full access here; this test is validating zsh approval/decline
// behavior, not Linux sandboxing.
codex_app_server_protocol::SandboxPolicy::DangerFullAccess
} else {
codex_app_server_protocol::SandboxPolicy::ReadOnly {
access: codex_app_server_protocol::ReadOnlyAccess::FullAccess,
}
}),
model: Some("mock-model".to_string()),
effort: Some(codex_protocol::openai_models::ReasoningEffort::Medium),
@@ -517,10 +555,13 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
let TurnStartResponse { turn } = to_response::<TurnStartResponse>(turn_resp)?;
let mut approval_ids = Vec::new();
for decision in [
let mut saw_parent_approval = false;
let target_decisions = [
CommandExecutionApprovalDecision::Accept,
CommandExecutionApprovalDecision::Cancel,
] {
];
let mut target_decision_index = 0;
while target_decision_index < target_decisions.len() {
let server_req = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_request_message(),
@@ -531,13 +572,40 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
panic!("expected CommandExecutionRequestApproval request");
};
assert_eq!(params.item_id, "call-zsh-fork-subcommand-decline");
approval_ids.push(
params
.approval_id
.clone()
.expect("approval_id must be present for zsh subcommand approvals"),
);
assert_eq!(params.thread_id, thread.id);
let is_target_subcommand = params.command.as_deref() == Some("/usr/bin/true");
if is_target_subcommand {
approval_ids.push(
params
.approval_id
.clone()
.expect("approval_id must be present for zsh subcommand approvals"),
);
}
let decision = if is_target_subcommand {
let decision = target_decisions[target_decision_index].clone();
target_decision_index += 1;
decision
} else {
let command = params
.command
.as_deref()
.expect("approval command should be present");
assert!(
!saw_parent_approval,
"unexpected extra non-target approval: {command}"
);
assert!(
command.contains("zsh-no-rc"),
"expected parent zsh wrapper approval, got: {command}"
);
assert!(
command.contains("/usr/bin/true && /usr/bin/true"),
"expected tool command in parent approval, got: {command}"
);
saw_parent_approval = true;
CommandExecutionApprovalDecision::Accept
};
mcp.send_response(
request_id,
serde_json::to_value(CommandExecutionRequestApprovalResponse { decision })?,
@@ -545,6 +613,8 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
.await?;
}
assert_eq!(approval_ids.len(), 2);
assert_ne!(approval_ids[0], approval_ids[1]);
let parent_completed_command_execution = timeout(DEFAULT_READ_TIMEOUT, async {
loop {
let completed_notif = mcp
@@ -563,32 +633,61 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
}
}
})
.await??;
.await;
let ThreadItem::CommandExecution {
id,
status,
aggregated_output,
..
} = parent_completed_command_execution
else {
unreachable!("loop ensures we break on parent command execution item");
};
assert_eq!(id, "call-zsh-fork-subcommand-decline");
assert_eq!(status, CommandExecutionStatus::Declined);
assert!(
aggregated_output.is_none()
|| aggregated_output == Some("exec command rejected by user".to_string())
);
assert_eq!(approval_ids.len(), 2);
assert_ne!(approval_ids[0], approval_ids[1]);
match parent_completed_command_execution {
Ok(Ok(parent_completed_command_execution)) => {
let ThreadItem::CommandExecution {
id,
status,
aggregated_output,
..
} = parent_completed_command_execution
else {
unreachable!("loop ensures we break on parent command execution item");
};
assert_eq!(id, "call-zsh-fork-subcommand-decline");
assert_eq!(status, CommandExecutionStatus::Declined);
assert!(
aggregated_output.is_none()
|| aggregated_output == Some("exec command rejected by user".to_string())
);
mcp.interrupt_turn_and_wait_for_aborted(thread.id, turn.id, DEFAULT_READ_TIMEOUT)
.await?;
mcp.interrupt_turn_and_wait_for_aborted(
thread.id.clone(),
turn.id.clone(),
DEFAULT_READ_TIMEOUT,
)
.await?;
}
Ok(Err(error)) => return Err(error),
Err(_) => {
// Some zsh builds abort the turn immediately after the rejected
// subcommand without emitting a parent `item/completed`.
let completed_notif = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_notification_message("turn/completed"),
)
.await??;
let completed: TurnCompletedNotification = serde_json::from_value(
completed_notif
.params
.expect("turn/completed params must be present"),
)?;
assert_eq!(completed.thread_id, thread.id);
assert_eq!(completed.turn.id, turn.id);
assert_eq!(completed.turn.status, TurnStatus::Interrupted);
}
}
Ok(())
}
async fn create_zsh_test_mcp_process(codex_home: &Path, zdotdir: &Path) -> Result<McpProcess> {
let zdotdir = zdotdir.to_string_lossy().into_owned();
McpProcess::new_with_env(codex_home, &[("ZDOTDIR", Some(zdotdir.as_str()))]).await
}
fn create_config_toml(
codex_home: &Path,
server_uri: &str,
@@ -640,36 +739,24 @@ stream_max_retries = 0
)
}
fn find_test_zsh_path() -> Option<std::path::PathBuf> {
if let Some(path) = std::env::var_os("CODEX_TEST_ZSH_PATH") {
let path = std::path::PathBuf::from(path);
if path.is_file() {
return Some(path);
}
panic!(
"CODEX_TEST_ZSH_PATH is set but is not a file: {}",
path.display()
fn find_test_zsh_path() -> Result<Option<std::path::PathBuf>> {
let repo_root = codex_utils_cargo_bin::repo_root()?;
let dotslash_zsh = repo_root.join("codex-rs/exec-server/tests/suite/zsh");
if !dotslash_zsh.is_file() {
eprintln!(
"skipping zsh fork test: shared zsh DotSlash file not found at {}",
dotslash_zsh.display()
);
return Ok(None);
}
for candidate in ["/bin/zsh", "/usr/bin/zsh"] {
let path = Path::new(candidate);
if path.is_file() {
return Some(path.to_path_buf());
match core_test_support::fetch_dotslash_file(&dotslash_zsh, None) {
Ok(path) => return Ok(Some(path)),
Err(error) => {
eprintln!("failed to fetch vendored zsh via dotslash: {error:#}");
}
}
let shell = std::env::var_os("SHELL")?;
let shell_path = std::path::PathBuf::from(shell);
if shell_path
.file_name()
.is_some_and(|file_name| file_name == "zsh")
&& shell_path.is_file()
{
return Some(shell_path);
}
None
Ok(None)
}
fn supports_exec_wrapper_intercept(zsh_path: &Path) -> bool {