mirror of
https://github.com/openai/codex.git
synced 2026-04-28 10:21:06 +03:00
test: vendor zsh fork via DotSlash and stabilize zsh-fork tests (#12518)
## Why The zsh integration tests were still brittle in two ways: - they relied on `CODEX_TEST_ZSH_PATH` / environment-specific setup, so they often did not exercise the patched zsh fork that `shell-tool-mcp` ships - once the tests consistently used the vendored zsh fork, they exposed real Linux-specific zsh-fork issues in CI In particular, the Linux failures were not just test noise: - the zsh-fork launch path was dropping `ExecRequest.arg0`, so Linux `codex-linux-sandbox` arg0 dispatch did not run and zsh wrapper-mode could receive malformed arguments - the `turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2` test uses the zsh exec bridge (which talks to the parent over a Unix socket), but Linux restricted sandbox seccomp denies `connect(2)`, causing timeouts on `ubuntu-24.04` x86/arm This PR makes the zsh tests consistently run against the intended vendored zsh fork and fixes/hardens the zsh-fork path so the Linux CI signal is meaningful. ## What Changed - Added a single shared test-only DotSlash file for the patched zsh fork at `codex-rs/exec-server/tests/suite/zsh` (analogous to the existing `bash` test resource). - Updated both app-server and exec-server zsh tests to use that shared DotSlash zsh (no duplicate zsh DotSlash file, no `CODEX_TEST_ZSH_PATH` dependency). - Updated the app-server zsh-fork test helper to resolve the shared DotSlash zsh and avoid silently falling back to host zsh. - Kept the app-server zsh-fork tests configured via `config.toml`, using a test wrapper path where needed to force `zsh -df` (and rewrite `-lc` to `-c`) for the subcommand-decline test. - Hardened the app-server subcommand-decline zsh-fork test for CI variability: - tolerate an extra `/responses` POST with a no-op mock response - tolerate non-target approval ordering while remaining strict on the two `/usr/bin/true` approvals and decline behavior - use `DangerFullAccess` on Linux for this one test because it validates zsh approval flow, not Linux sandbox socket restrictions - Fixed zsh-fork process launching on Linux by preserving `req.arg0` in `ZshExecBridge::execute_shell_request(...)` so `codex-linux-sandbox` arg0 dispatch continues to work. - Moved `maybe_run_zsh_exec_wrapper_mode()` under `arg0_dispatch_or_else(...)` in `app-server` and `cli` so wrapper-mode handling coexists correctly with arg0-dispatched helper modes. - Consolidated duplicated `dotslash -- fetch` resolution logic into shared test support (`core/tests/common/lib.rs`). - Updated `codex-rs/exec-server/tests/suite/accept_elicitation.rs` to use DotSlash zsh and hardened the zsh elicitation test for Bazel/zsh differences by: - resolving an absolute `git` path - running `git init --quiet .` - asserting success / `.git` creation instead of relying on banner text ## Verification - `cargo test -p codex-app-server turn_start_zsh_fork -- --nocapture` - `cargo test -p codex-exec-server accept_elicitation -- --nocapture` - `bazel test //codex-rs/exec-server:exec-server-all-test --test_output=streamed --test_arg=--nocapture --test_arg=accept_elicitation_for_prompt_rule_with_zsh` - CI (`rust-ci`) on the final cleaned commit: `Tests — ubuntu-24.04 - x86_64-unknown-linux-gnu` and `Tests — ubuntu-24.04-arm - aarch64-unknown-linux-gnu` passed in [run 22291424358](https://github.com/openai/codex/actions/runs/22291424358)
This commit is contained in:
@@ -2,18 +2,15 @@
|
||||
//
|
||||
// Running these tests with the patched zsh fork:
|
||||
//
|
||||
// The suite uses `CODEX_TEST_ZSH_PATH` when set. Example:
|
||||
// CODEX_TEST_ZSH_PATH="$HOME/.local/codex-zsh-77045ef/bin/zsh" \
|
||||
// cargo test -p codex-app-server turn_start_zsh_fork -- --nocapture
|
||||
//
|
||||
// For a single test:
|
||||
// CODEX_TEST_ZSH_PATH="$HOME/.local/codex-zsh-77045ef/bin/zsh" \
|
||||
// cargo test -p codex-app-server turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2 -- --nocapture
|
||||
// The suite resolves the shared test-only zsh DotSlash file at
|
||||
// `exec-server/tests/suite/zsh` via DotSlash on first use, so `dotslash` and
|
||||
// network access are required the first time the artifact is fetched.
|
||||
|
||||
use anyhow::Result;
|
||||
use app_test_support::McpProcess;
|
||||
use app_test_support::create_final_assistant_message_sse_response;
|
||||
use app_test_support::create_mock_responses_server_sequence;
|
||||
use app_test_support::create_mock_responses_server_sequence_unchecked;
|
||||
use app_test_support::create_shell_command_sse_response;
|
||||
use app_test_support::to_response;
|
||||
use codex_app_server_protocol::CommandExecutionApprovalDecision;
|
||||
@@ -38,6 +35,7 @@ use core_test_support::responses;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use pretty_assertions::assert_eq;
|
||||
use std::collections::BTreeMap;
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
use std::path::Path;
|
||||
use tempfile::TempDir;
|
||||
use tokio::time::timeout;
|
||||
@@ -57,7 +55,7 @@ async fn turn_start_shell_zsh_fork_executes_command_v2() -> Result<()> {
|
||||
let workspace = tmp.path().join("workspace");
|
||||
std::fs::create_dir(&workspace)?;
|
||||
|
||||
let Some(zsh_path) = find_test_zsh_path() else {
|
||||
let Some(zsh_path) = find_test_zsh_path()? else {
|
||||
eprintln!("skipping zsh fork test: no zsh executable found");
|
||||
return Ok(());
|
||||
};
|
||||
@@ -82,7 +80,7 @@ async fn turn_start_shell_zsh_fork_executes_command_v2() -> Result<()> {
|
||||
&zsh_path,
|
||||
)?;
|
||||
|
||||
let mut mcp = McpProcess::new(&codex_home).await?;
|
||||
let mut mcp = create_zsh_test_mcp_process(&codex_home, &workspace).await?;
|
||||
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
|
||||
|
||||
let start_id = mcp
|
||||
@@ -167,7 +165,7 @@ async fn turn_start_shell_zsh_fork_exec_approval_decline_v2() -> Result<()> {
|
||||
let workspace = tmp.path().join("workspace");
|
||||
std::fs::create_dir(&workspace)?;
|
||||
|
||||
let Some(zsh_path) = find_test_zsh_path() else {
|
||||
let Some(zsh_path) = find_test_zsh_path()? else {
|
||||
eprintln!("skipping zsh fork decline test: no zsh executable found");
|
||||
return Ok(());
|
||||
};
|
||||
@@ -199,7 +197,7 @@ async fn turn_start_shell_zsh_fork_exec_approval_decline_v2() -> Result<()> {
|
||||
&zsh_path,
|
||||
)?;
|
||||
|
||||
let mut mcp = McpProcess::new(&codex_home).await?;
|
||||
let mut mcp = create_zsh_test_mcp_process(&codex_home, &workspace).await?;
|
||||
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
|
||||
|
||||
let start_id = mcp
|
||||
@@ -303,7 +301,7 @@ async fn turn_start_shell_zsh_fork_exec_approval_cancel_v2() -> Result<()> {
|
||||
let workspace = tmp.path().join("workspace");
|
||||
std::fs::create_dir(&workspace)?;
|
||||
|
||||
let Some(zsh_path) = find_test_zsh_path() else {
|
||||
let Some(zsh_path) = find_test_zsh_path()? else {
|
||||
eprintln!("skipping zsh fork cancel test: no zsh executable found");
|
||||
return Ok(());
|
||||
};
|
||||
@@ -332,7 +330,7 @@ async fn turn_start_shell_zsh_fork_exec_approval_cancel_v2() -> Result<()> {
|
||||
&zsh_path,
|
||||
)?;
|
||||
|
||||
let mut mcp = McpProcess::new(&codex_home).await?;
|
||||
let mut mcp = create_zsh_test_mcp_process(&codex_home, &workspace).await?;
|
||||
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
|
||||
|
||||
let start_id = mcp
|
||||
@@ -434,7 +432,7 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
|
||||
let workspace = tmp.path().join("workspace");
|
||||
std::fs::create_dir(&workspace)?;
|
||||
|
||||
let Some(zsh_path) = find_test_zsh_path() else {
|
||||
let Some(zsh_path) = find_test_zsh_path()? else {
|
||||
eprintln!("skipping zsh fork subcommand decline test: no zsh executable found");
|
||||
return Ok(());
|
||||
};
|
||||
@@ -446,6 +444,29 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
|
||||
return Ok(());
|
||||
}
|
||||
eprintln!("using zsh path for zsh-fork test: {}", zsh_path.display());
|
||||
let zsh_path_for_config = {
|
||||
// App-server config accepts only a zsh path, not extra argv. Use a
|
||||
// wrapper so this test can force `-df` and downgrade `-lc` to `-c`
|
||||
// to avoid rc/login-shell startup noise.
|
||||
let path = workspace.join("zsh-no-rc");
|
||||
std::fs::write(
|
||||
&path,
|
||||
format!(
|
||||
r#"#!/bin/sh
|
||||
if [ "$1" = "-lc" ]; then
|
||||
shift
|
||||
set -- -c "$@"
|
||||
fi
|
||||
exec "{}" -df "$@"
|
||||
"#,
|
||||
zsh_path.display()
|
||||
),
|
||||
)?;
|
||||
let mut permissions = std::fs::metadata(&path)?.permissions();
|
||||
permissions.set_mode(0o755);
|
||||
std::fs::set_permissions(&path, permissions)?;
|
||||
path
|
||||
};
|
||||
|
||||
let tool_call_arguments = serde_json::to_string(&serde_json::json!({
|
||||
"command": "/usr/bin/true && /usr/bin/true",
|
||||
@@ -461,7 +482,16 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
|
||||
),
|
||||
responses::ev_completed("resp-1"),
|
||||
]);
|
||||
let server = create_mock_responses_server_sequence(vec![response]).await;
|
||||
let no_op_response = responses::sse(vec![
|
||||
responses::ev_response_created("resp-2"),
|
||||
responses::ev_completed("resp-2"),
|
||||
]);
|
||||
// Linux CI has occasionally issued a second `/responses` POST after the
|
||||
// subcommand-decline flow. This test is about approval/decline behavior in
|
||||
// the zsh fork, not exact model request count, so allow an extra request
|
||||
// and return a harmless no-op response if it arrives.
|
||||
let server =
|
||||
create_mock_responses_server_sequence_unchecked(vec![response, no_op_response]).await;
|
||||
create_config_toml(
|
||||
&codex_home,
|
||||
&server.uri(),
|
||||
@@ -471,10 +501,10 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
|
||||
(Feature::UnifiedExec, false),
|
||||
(Feature::ShellSnapshot, false),
|
||||
]),
|
||||
&zsh_path,
|
||||
&zsh_path_for_config,
|
||||
)?;
|
||||
|
||||
let mut mcp = McpProcess::new(&codex_home).await?;
|
||||
let mut mcp = create_zsh_test_mcp_process(&codex_home, &workspace).await?;
|
||||
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
|
||||
|
||||
let start_id = mcp
|
||||
@@ -500,8 +530,16 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
|
||||
}],
|
||||
cwd: Some(workspace.clone()),
|
||||
approval_policy: Some(codex_app_server_protocol::AskForApproval::OnRequest),
|
||||
sandbox_policy: Some(codex_app_server_protocol::SandboxPolicy::ReadOnly {
|
||||
access: codex_app_server_protocol::ReadOnlyAccess::FullAccess,
|
||||
sandbox_policy: Some(if cfg!(target_os = "linux") {
|
||||
// The zsh exec-bridge wrapper uses a Unix socket back to the parent
|
||||
// process. Linux restricted sandbox seccomp denies connect(2), so use
|
||||
// full access here; this test is validating zsh approval/decline
|
||||
// behavior, not Linux sandboxing.
|
||||
codex_app_server_protocol::SandboxPolicy::DangerFullAccess
|
||||
} else {
|
||||
codex_app_server_protocol::SandboxPolicy::ReadOnly {
|
||||
access: codex_app_server_protocol::ReadOnlyAccess::FullAccess,
|
||||
}
|
||||
}),
|
||||
model: Some("mock-model".to_string()),
|
||||
effort: Some(codex_protocol::openai_models::ReasoningEffort::Medium),
|
||||
@@ -517,10 +555,13 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
|
||||
let TurnStartResponse { turn } = to_response::<TurnStartResponse>(turn_resp)?;
|
||||
|
||||
let mut approval_ids = Vec::new();
|
||||
for decision in [
|
||||
let mut saw_parent_approval = false;
|
||||
let target_decisions = [
|
||||
CommandExecutionApprovalDecision::Accept,
|
||||
CommandExecutionApprovalDecision::Cancel,
|
||||
] {
|
||||
];
|
||||
let mut target_decision_index = 0;
|
||||
while target_decision_index < target_decisions.len() {
|
||||
let server_req = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_request_message(),
|
||||
@@ -531,13 +572,40 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
|
||||
panic!("expected CommandExecutionRequestApproval request");
|
||||
};
|
||||
assert_eq!(params.item_id, "call-zsh-fork-subcommand-decline");
|
||||
approval_ids.push(
|
||||
params
|
||||
.approval_id
|
||||
.clone()
|
||||
.expect("approval_id must be present for zsh subcommand approvals"),
|
||||
);
|
||||
assert_eq!(params.thread_id, thread.id);
|
||||
let is_target_subcommand = params.command.as_deref() == Some("/usr/bin/true");
|
||||
if is_target_subcommand {
|
||||
approval_ids.push(
|
||||
params
|
||||
.approval_id
|
||||
.clone()
|
||||
.expect("approval_id must be present for zsh subcommand approvals"),
|
||||
);
|
||||
}
|
||||
let decision = if is_target_subcommand {
|
||||
let decision = target_decisions[target_decision_index].clone();
|
||||
target_decision_index += 1;
|
||||
decision
|
||||
} else {
|
||||
let command = params
|
||||
.command
|
||||
.as_deref()
|
||||
.expect("approval command should be present");
|
||||
assert!(
|
||||
!saw_parent_approval,
|
||||
"unexpected extra non-target approval: {command}"
|
||||
);
|
||||
assert!(
|
||||
command.contains("zsh-no-rc"),
|
||||
"expected parent zsh wrapper approval, got: {command}"
|
||||
);
|
||||
assert!(
|
||||
command.contains("/usr/bin/true && /usr/bin/true"),
|
||||
"expected tool command in parent approval, got: {command}"
|
||||
);
|
||||
saw_parent_approval = true;
|
||||
CommandExecutionApprovalDecision::Accept
|
||||
};
|
||||
mcp.send_response(
|
||||
request_id,
|
||||
serde_json::to_value(CommandExecutionRequestApprovalResponse { decision })?,
|
||||
@@ -545,6 +613,8 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
|
||||
.await?;
|
||||
}
|
||||
|
||||
assert_eq!(approval_ids.len(), 2);
|
||||
assert_ne!(approval_ids[0], approval_ids[1]);
|
||||
let parent_completed_command_execution = timeout(DEFAULT_READ_TIMEOUT, async {
|
||||
loop {
|
||||
let completed_notif = mcp
|
||||
@@ -563,32 +633,61 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
|
||||
}
|
||||
}
|
||||
})
|
||||
.await??;
|
||||
.await;
|
||||
|
||||
let ThreadItem::CommandExecution {
|
||||
id,
|
||||
status,
|
||||
aggregated_output,
|
||||
..
|
||||
} = parent_completed_command_execution
|
||||
else {
|
||||
unreachable!("loop ensures we break on parent command execution item");
|
||||
};
|
||||
assert_eq!(id, "call-zsh-fork-subcommand-decline");
|
||||
assert_eq!(status, CommandExecutionStatus::Declined);
|
||||
assert!(
|
||||
aggregated_output.is_none()
|
||||
|| aggregated_output == Some("exec command rejected by user".to_string())
|
||||
);
|
||||
assert_eq!(approval_ids.len(), 2);
|
||||
assert_ne!(approval_ids[0], approval_ids[1]);
|
||||
match parent_completed_command_execution {
|
||||
Ok(Ok(parent_completed_command_execution)) => {
|
||||
let ThreadItem::CommandExecution {
|
||||
id,
|
||||
status,
|
||||
aggregated_output,
|
||||
..
|
||||
} = parent_completed_command_execution
|
||||
else {
|
||||
unreachable!("loop ensures we break on parent command execution item");
|
||||
};
|
||||
assert_eq!(id, "call-zsh-fork-subcommand-decline");
|
||||
assert_eq!(status, CommandExecutionStatus::Declined);
|
||||
assert!(
|
||||
aggregated_output.is_none()
|
||||
|| aggregated_output == Some("exec command rejected by user".to_string())
|
||||
);
|
||||
|
||||
mcp.interrupt_turn_and_wait_for_aborted(thread.id, turn.id, DEFAULT_READ_TIMEOUT)
|
||||
.await?;
|
||||
mcp.interrupt_turn_and_wait_for_aborted(
|
||||
thread.id.clone(),
|
||||
turn.id.clone(),
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Ok(Err(error)) => return Err(error),
|
||||
Err(_) => {
|
||||
// Some zsh builds abort the turn immediately after the rejected
|
||||
// subcommand without emitting a parent `item/completed`.
|
||||
let completed_notif = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_notification_message("turn/completed"),
|
||||
)
|
||||
.await??;
|
||||
let completed: TurnCompletedNotification = serde_json::from_value(
|
||||
completed_notif
|
||||
.params
|
||||
.expect("turn/completed params must be present"),
|
||||
)?;
|
||||
assert_eq!(completed.thread_id, thread.id);
|
||||
assert_eq!(completed.turn.id, turn.id);
|
||||
assert_eq!(completed.turn.status, TurnStatus::Interrupted);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn create_zsh_test_mcp_process(codex_home: &Path, zdotdir: &Path) -> Result<McpProcess> {
|
||||
let zdotdir = zdotdir.to_string_lossy().into_owned();
|
||||
McpProcess::new_with_env(codex_home, &[("ZDOTDIR", Some(zdotdir.as_str()))]).await
|
||||
}
|
||||
|
||||
fn create_config_toml(
|
||||
codex_home: &Path,
|
||||
server_uri: &str,
|
||||
@@ -640,36 +739,24 @@ stream_max_retries = 0
|
||||
)
|
||||
}
|
||||
|
||||
fn find_test_zsh_path() -> Option<std::path::PathBuf> {
|
||||
if let Some(path) = std::env::var_os("CODEX_TEST_ZSH_PATH") {
|
||||
let path = std::path::PathBuf::from(path);
|
||||
if path.is_file() {
|
||||
return Some(path);
|
||||
}
|
||||
panic!(
|
||||
"CODEX_TEST_ZSH_PATH is set but is not a file: {}",
|
||||
path.display()
|
||||
fn find_test_zsh_path() -> Result<Option<std::path::PathBuf>> {
|
||||
let repo_root = codex_utils_cargo_bin::repo_root()?;
|
||||
let dotslash_zsh = repo_root.join("codex-rs/exec-server/tests/suite/zsh");
|
||||
if !dotslash_zsh.is_file() {
|
||||
eprintln!(
|
||||
"skipping zsh fork test: shared zsh DotSlash file not found at {}",
|
||||
dotslash_zsh.display()
|
||||
);
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
for candidate in ["/bin/zsh", "/usr/bin/zsh"] {
|
||||
let path = Path::new(candidate);
|
||||
if path.is_file() {
|
||||
return Some(path.to_path_buf());
|
||||
match core_test_support::fetch_dotslash_file(&dotslash_zsh, None) {
|
||||
Ok(path) => return Ok(Some(path)),
|
||||
Err(error) => {
|
||||
eprintln!("failed to fetch vendored zsh via dotslash: {error:#}");
|
||||
}
|
||||
}
|
||||
|
||||
let shell = std::env::var_os("SHELL")?;
|
||||
let shell_path = std::path::PathBuf::from(shell);
|
||||
if shell_path
|
||||
.file_name()
|
||||
.is_some_and(|file_name| file_name == "zsh")
|
||||
&& shell_path.is_file()
|
||||
{
|
||||
return Some(shell_path);
|
||||
}
|
||||
|
||||
None
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn supports_exec_wrapper_intercept(zsh_path: &Path) -> bool {
|
||||
|
||||
Reference in New Issue
Block a user