From b77b71b4b67f538ccfe8516e1a71f1ae551be183 Mon Sep 17 00:00:00 2001 From: Michael Bolin Date: Wed, 4 Mar 2026 23:33:31 -0800 Subject: [PATCH] refactor: route zsh-fork through unified exec --- .../tests/suite/v2/turn_start_zsh_fork.rs | 236 +++++++++++++++--- .../tools/runtimes/shell/unix_escalation.rs | 65 ++--- codex-rs/core/src/tools/spec.rs | 8 +- codex-rs/core/tests/common/zsh_fork.rs | 23 ++ codex-rs/core/tests/suite/approvals.rs | 153 ++++++++++++ codex-rs/core/tests/suite/skill_approval.rs | 202 +++++++++++++++ 6 files changed, 621 insertions(+), 66 deletions(-) diff --git a/codex-rs/app-server/tests/suite/v2/turn_start_zsh_fork.rs b/codex-rs/app-server/tests/suite/v2/turn_start_zsh_fork.rs index bdbb283432..203194d616 100644 --- a/codex-rs/app-server/tests/suite/v2/turn_start_zsh_fork.rs +++ b/codex-rs/app-server/tests/suite/v2/turn_start_zsh_fork.rs @@ -11,7 +11,6 @@ use app_test_support::McpProcess; use app_test_support::create_final_assistant_message_sse_response; use app_test_support::create_mock_responses_server_sequence; use app_test_support::create_mock_responses_server_sequence_unchecked; -use app_test_support::create_shell_command_sse_response; use app_test_support::to_response; use codex_app_server_protocol::CommandAction; use codex_app_server_protocol::CommandExecutionApprovalDecision; @@ -35,9 +34,11 @@ use codex_core::features::Feature; use core_test_support::responses; use core_test_support::skip_if_no_network; use pretty_assertions::assert_eq; +use serde_json::json; use std::collections::BTreeMap; use std::path::Path; use tempfile::TempDir; +use tokio::time::sleep; use tokio::time::timeout; #[cfg(windows)] @@ -61,10 +62,8 @@ async fn turn_start_shell_zsh_fork_executes_command_v2() -> Result<()> { }; eprintln!("using zsh path for zsh-fork test: {}", zsh_path.display()); - let responses = vec![create_shell_command_sse_response( - vec!["echo".to_string(), "hi".to_string()], - None, - Some(5000), + let responses = vec![create_zsh_fork_exec_command_sse_response( + "echo hi", "call-zsh-fork", )?]; let server = create_mock_responses_server_sequence(responses).await; @@ -74,7 +73,7 @@ async fn turn_start_shell_zsh_fork_executes_command_v2() -> Result<()> { "never", &BTreeMap::from([ (Feature::ShellZshFork, true), - (Feature::UnifiedExec, false), + (Feature::UnifiedExec, true), (Feature::ShellSnapshot, false), ]), &zsh_path, @@ -172,14 +171,8 @@ async fn turn_start_shell_zsh_fork_exec_approval_decline_v2() -> Result<()> { eprintln!("using zsh path for zsh-fork test: {}", zsh_path.display()); let responses = vec![ - create_shell_command_sse_response( - vec![ - "python3".to_string(), - "-c".to_string(), - "print(42)".to_string(), - ], - None, - Some(5000), + create_zsh_fork_exec_command_sse_response( + "python3 -c 'print(42)'", "call-zsh-fork-decline", )?, create_final_assistant_message_sse_response("done")?, @@ -191,7 +184,7 @@ async fn turn_start_shell_zsh_fork_exec_approval_decline_v2() -> Result<()> { "untrusted", &BTreeMap::from([ (Feature::ShellZshFork, true), - (Feature::UnifiedExec, false), + (Feature::UnifiedExec, true), (Feature::ShellSnapshot, false), ]), &zsh_path, @@ -307,14 +300,8 @@ async fn turn_start_shell_zsh_fork_exec_approval_cancel_v2() -> Result<()> { }; eprintln!("using zsh path for zsh-fork test: {}", zsh_path.display()); - let responses = vec![create_shell_command_sse_response( - vec![ - "python3".to_string(), - "-c".to_string(), - "print(42)".to_string(), - ], - None, - Some(5000), + let responses = vec![create_zsh_fork_exec_command_sse_response( + "python3 -c 'print(42)'", "call-zsh-fork-cancel", )?]; let server = create_mock_responses_server_sequence(responses).await; @@ -324,7 +311,7 @@ async fn turn_start_shell_zsh_fork_exec_approval_cancel_v2() -> Result<()> { "untrusted", &BTreeMap::from([ (Feature::ShellZshFork, true), - (Feature::UnifiedExec, false), + (Feature::UnifiedExec, true), (Feature::ShellSnapshot, false), ]), &zsh_path, @@ -422,6 +409,181 @@ async fn turn_start_shell_zsh_fork_exec_approval_cancel_v2() -> Result<()> { Ok(()) } +#[tokio::test] +async fn turn_start_shell_zsh_fork_interrupt_kills_approved_subcommand_v2() -> Result<()> { + skip_if_no_network!(Ok(())); + + let tmp = TempDir::new()?; + let codex_home = tmp.path().join("codex_home"); + std::fs::create_dir(&codex_home)?; + let workspace = tmp.path().join("workspace"); + std::fs::create_dir(&workspace)?; + let pid_file = workspace.join("approved-subcommand.pid"); + let pid_file_display = pid_file.display().to_string(); + assert!( + !pid_file_display.contains('\''), + "test workspace path should not contain single quotes: {pid_file_display}" + ); + + let Some(zsh_path) = find_test_zsh_path()? else { + eprintln!("skipping zsh fork interrupt cleanup test: no zsh executable found"); + return Ok(()); + }; + if !supports_exec_wrapper_intercept(&zsh_path) { + eprintln!( + "skipping zsh fork interrupt cleanup test: zsh does not support EXEC_WRAPPER intercepts ({})", + zsh_path.display() + ); + return Ok(()); + } + let zsh_path_display = zsh_path.display().to_string(); + eprintln!("using zsh path for zsh-fork test: {zsh_path_display}"); + + let shell_command = + format!("/bin/sh -c 'echo $$ > \"{pid_file_display}\" && exec /bin/sleep 100'"); + let tool_call_arguments = serde_json::to_string(&json!({ + "cmd": shell_command, + "yield_time_ms": 30_000, + }))?; + let response = responses::sse(vec![ + responses::ev_response_created("resp-1"), + responses::ev_function_call( + "call-zsh-fork-interrupt-cleanup", + "exec_command", + &tool_call_arguments, + ), + responses::ev_completed("resp-1"), + ]); + let no_op_response = responses::sse(vec![ + responses::ev_response_created("resp-2"), + responses::ev_completed("resp-2"), + ]); + let server = + create_mock_responses_server_sequence_unchecked(vec![response, no_op_response]).await; + create_config_toml( + &codex_home, + &server.uri(), + "untrusted", + &BTreeMap::from([ + (Feature::ShellZshFork, true), + (Feature::UnifiedExec, true), + (Feature::ShellSnapshot, false), + ]), + &zsh_path, + )?; + + let mut mcp = create_zsh_test_mcp_process(&codex_home, &workspace).await?; + timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + + let start_id = mcp + .send_thread_start_request(ThreadStartParams { + model: Some("mock-model".to_string()), + cwd: Some(workspace.to_string_lossy().into_owned()), + ..Default::default() + }) + .await?; + let start_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(start_id)), + ) + .await??; + let ThreadStartResponse { thread, .. } = to_response::(start_resp)?; + + let turn_id = mcp + .send_turn_start_request(TurnStartParams { + thread_id: thread.id.clone(), + input: vec![V2UserInput::Text { + text: "run the long-lived command".to_string(), + text_elements: Vec::new(), + }], + cwd: Some(workspace.clone()), + approval_policy: Some(codex_app_server_protocol::AskForApproval::UnlessTrusted), + sandbox_policy: Some(codex_app_server_protocol::SandboxPolicy::WorkspaceWrite { + writable_roots: vec![workspace.clone().try_into()?], + read_only_access: codex_app_server_protocol::ReadOnlyAccess::FullAccess, + network_access: false, + exclude_tmpdir_env_var: false, + exclude_slash_tmp: false, + }), + model: Some("mock-model".to_string()), + effort: Some(codex_protocol::openai_models::ReasoningEffort::Medium), + summary: Some(codex_protocol::config_types::ReasoningSummary::Auto), + ..Default::default() + }) + .await?; + let turn_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(turn_id)), + ) + .await??; + let TurnStartResponse { turn } = to_response::(turn_resp)?; + + let mut saw_target_approval = false; + while !saw_target_approval { + let server_req = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_request_message(), + ) + .await??; + let ServerRequest::CommandExecutionRequestApproval { request_id, params } = server_req + else { + panic!("expected CommandExecutionRequestApproval request"); + }; + let approval_command = params.command.clone().unwrap_or_default(); + saw_target_approval = approval_command.contains("/bin/sh") + && approval_command.contains(&pid_file_display) + && !approval_command.contains(&zsh_path_display); + mcp.send_response( + request_id, + serde_json::to_value(CommandExecutionRequestApprovalResponse { + decision: CommandExecutionApprovalDecision::Accept, + })?, + ) + .await?; + } + + let pid = timeout(DEFAULT_READ_TIMEOUT, async { + loop { + if let Ok(contents) = std::fs::read_to_string(&pid_file) { + return Ok::(contents.trim().parse()?); + } + sleep(std::time::Duration::from_millis(20)).await; + } + }) + .await??; + let still_running = std::process::Command::new("/bin/kill") + .args(["-0", &pid.to_string()]) + .status()? + .success(); + assert!( + still_running, + "expected approved intercepted subprocess pid {pid} to be running before interrupt" + ); + + mcp.interrupt_turn_and_wait_for_aborted( + thread.id.clone(), + turn.id.clone(), + DEFAULT_READ_TIMEOUT, + ) + .await?; + + timeout(DEFAULT_READ_TIMEOUT, async { + loop { + let still_running = std::process::Command::new("/bin/kill") + .args(["-0", &pid.to_string()]) + .status()? + .success(); + if !still_running { + return Ok::<(), anyhow::Error>(()); + } + sleep(std::time::Duration::from_millis(20)).await; + } + }) + .await??; + + Ok(()) +} + #[tokio::test] async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2() -> Result<()> { skip_if_no_network!(Ok(())); @@ -453,16 +615,15 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2() first_file.display(), second_file.display() ); - let tool_call_arguments = serde_json::to_string(&serde_json::json!({ - "command": shell_command, - "workdir": serde_json::Value::Null, - "timeout_ms": 5000 + let tool_call_arguments = serde_json::to_string(&json!({ + "cmd": shell_command, + "yield_time_ms": 5000, }))?; let response = responses::sse(vec![ responses::ev_response_created("resp-1"), responses::ev_function_call( "call-zsh-fork-subcommand-decline", - "shell_command", + "exec_command", &tool_call_arguments, ), responses::ev_completed("resp-1"), @@ -483,7 +644,7 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2() "untrusted", &BTreeMap::from([ (Feature::ShellZshFork, true), - (Feature::UnifiedExec, false), + (Feature::UnifiedExec, true), (Feature::ShellSnapshot, false), ]), &zsh_path, @@ -694,6 +855,21 @@ async fn create_zsh_test_mcp_process(codex_home: &Path, zdotdir: &Path) -> Resul McpProcess::new_with_env(codex_home, &[("ZDOTDIR", Some(zdotdir.as_str()))]).await } +fn create_zsh_fork_exec_command_sse_response( + command: &str, + call_id: &str, +) -> anyhow::Result { + let tool_call_arguments = serde_json::to_string(&json!({ + "cmd": command, + "yield_time_ms": 5000, + }))?; + Ok(responses::sse(vec![ + responses::ev_response_created("resp-1"), + responses::ev_function_call(call_id, "exec_command", &tool_call_arguments), + responses::ev_completed("resp-1"), + ])) +} + fn create_config_toml( codex_home: &Path, server_uri: &str, diff --git a/codex-rs/core/src/tools/runtimes/shell/unix_escalation.rs b/codex-rs/core/src/tools/runtimes/shell/unix_escalation.rs index 1be2654cec..4b3d9db8e6 100644 --- a/codex-rs/core/src/tools/runtimes/shell/unix_escalation.rs +++ b/codex-rs/core/src/tools/runtimes/shell/unix_escalation.rs @@ -49,7 +49,6 @@ use std::collections::HashMap; use std::path::PathBuf; use std::sync::Arc; use std::time::Duration; -use tokio::sync::RwLock; use tokio_util::sync::CancellationToken; use uuid::Uuid; @@ -107,9 +106,6 @@ pub(super) async fn try_run_zsh_fork( req.timeout_ms .unwrap_or(crate::exec::DEFAULT_EXEC_COMMAND_TIMEOUT_MS), ); - let exec_policy = Arc::new(RwLock::new( - ctx.session.services.exec_policy.current().as_ref().clone(), - )); let command_executor = CoreShellCommandExecutor { command, cwd: sandbox_cwd, @@ -154,7 +150,6 @@ pub(super) async fn try_run_zsh_fork( let stopwatch = Stopwatch::new(effective_timeout); let cancel_token = stopwatch.cancellation_token(); let escalation_policy = CoreShellActionProvider { - policy: Arc::clone(&exec_policy), session: Arc::clone(&ctx.session), turn: Arc::clone(&ctx.turn), call_id: ctx.call_id.clone(), @@ -214,20 +209,30 @@ pub(crate) async fn prepare_unified_exec_zsh_fork( return Ok(None); } - let exec_policy = Arc::new(RwLock::new( - ctx.session.services.exec_policy.current().as_ref().clone(), - )); + let ExecRequest { + command, + cwd, + env, + network, + expiration: _expiration, + sandbox, + windows_sandbox_level, + sandbox_permissions, + sandbox_policy, + justification, + arg0, + } = &exec_request; let command_executor = CoreShellCommandExecutor { - command: exec_request.command.clone(), - cwd: exec_request.cwd.clone(), - sandbox_policy: exec_request.sandbox_policy.clone(), - sandbox: exec_request.sandbox, - env: exec_request.env.clone(), - network: exec_request.network.clone(), - windows_sandbox_level: exec_request.windows_sandbox_level, - sandbox_permissions: exec_request.sandbox_permissions, - justification: exec_request.justification.clone(), - arg0: exec_request.arg0.clone(), + command: command.clone(), + cwd: cwd.clone(), + sandbox_policy: sandbox_policy.clone(), + sandbox: *sandbox, + env: env.clone(), + network: network.clone(), + windows_sandbox_level: *windows_sandbox_level, + sandbox_permissions: *sandbox_permissions, + justification: justification.clone(), + arg0: arg0.clone(), sandbox_policy_cwd: ctx.turn.cwd.clone(), macos_seatbelt_profile_extensions: ctx .turn @@ -249,7 +254,6 @@ pub(crate) async fn prepare_unified_exec_zsh_fork( ) })?; let escalation_policy = CoreShellActionProvider { - policy: Arc::clone(&exec_policy), session: Arc::clone(&ctx.session), turn: Arc::clone(&ctx.turn), call_id: ctx.call_id.clone(), @@ -277,7 +281,6 @@ pub(crate) async fn prepare_unified_exec_zsh_fork( } struct CoreShellActionProvider { - policy: Arc>, session: Arc, turn: Arc, call_id: String, @@ -597,18 +600,16 @@ impl EscalationPolicy for CoreShellActionProvider { .await; } - let evaluation = { - let policy = self.policy.read().await; - evaluate_intercepted_exec_policy( - &policy, - program, - argv, - self.approval_policy, - &self.sandbox_policy, - self.sandbox_permissions, - ENABLE_INTERCEPTED_EXEC_POLICY_SHELL_WRAPPER_PARSING, - ) - }; + let policy = self.session.services.exec_policy.current(); + let evaluation = evaluate_intercepted_exec_policy( + policy.as_ref(), + program, + argv, + self.approval_policy, + &self.sandbox_policy, + self.sandbox_permissions, + ENABLE_INTERCEPTED_EXEC_POLICY_SHELL_WRAPPER_PARSING, + ); // When true, means the Evaluation was due to *.rules, not the // fallback function. let decision_driven_by_policy = diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs index 5739d3f028..3b60923736 100644 --- a/codex-rs/core/src/tools/spec.rs +++ b/codex-rs/core/src/tools/spec.rs @@ -115,8 +115,6 @@ impl ToolsConfig { let shell_type = if !features.enabled(Feature::ShellTool) { ConfigShellToolType::Disabled - } else if features.enabled(Feature::ShellZshFork) { - ConfigShellToolType::ShellCommand } else if features.enabled(Feature::UnifiedExec) { // If ConPTY not supported (for old Windows versions), fallback on ShellCommand. if codex_utils_pty::conpty_supported() { @@ -124,6 +122,8 @@ impl ToolsConfig { } else { ConfigShellToolType::ShellCommand } + } else if features.enabled(Feature::ShellZshFork) { + ConfigShellToolType::ShellCommand } else { model_info.shell_type }; @@ -2765,7 +2765,7 @@ mod tests { } #[test] - fn shell_zsh_fork_prefers_shell_command_over_unified_exec() { + fn shell_zsh_fork_uses_unified_exec_when_enabled() { let config = test_config(); let model_info = ModelsManager::construct_model_info_offline_for_tests("o3", &config); let mut features = Features::with_defaults(); @@ -2779,7 +2779,7 @@ mod tests { session_source: SessionSource::Cli, }); - assert_eq!(tools_config.shell_type, ConfigShellToolType::ShellCommand); + assert_eq!(tools_config.shell_type, ConfigShellToolType::UnifiedExec); assert_eq!( tools_config.shell_command_backend, ShellCommandBackendConfig::ZshFork diff --git a/codex-rs/core/tests/common/zsh_fork.rs b/codex-rs/core/tests/common/zsh_fork.rs index fbc46421d9..29db068ef5 100644 --- a/codex-rs/core/tests/common/zsh_fork.rs +++ b/codex-rs/core/tests/common/zsh_fork.rs @@ -91,6 +91,29 @@ where builder.build(server).await } +pub async fn build_unified_exec_zsh_fork_test( + server: &wiremock::MockServer, + runtime: ZshForkRuntime, + approval_policy: AskForApproval, + sandbox_policy: SandboxPolicy, + pre_build_hook: F, +) -> Result +where + F: FnOnce(&Path) + Send + 'static, +{ + let mut builder = test_codex() + .with_pre_build_hook(pre_build_hook) + .with_config(move |config| { + runtime.apply_to_config(config, approval_policy, sandbox_policy); + config.use_experimental_unified_exec_tool = true; + config + .features + .enable(Feature::UnifiedExec) + .expect("test config should allow feature update"); + }); + builder.build(server).await +} + fn find_test_zsh_path() -> Result> { let repo_root = codex_utils_cargo_bin::repo_root()?; let dotslash_zsh = repo_root.join("codex-rs/app-server/tests/suite/zsh"); diff --git a/codex-rs/core/tests/suite/approvals.rs b/codex-rs/core/tests/suite/approvals.rs index 9ca159965f..9bc8a9185b 100644 --- a/codex-rs/core/tests/suite/approvals.rs +++ b/codex-rs/core/tests/suite/approvals.rs @@ -35,6 +35,7 @@ use core_test_support::test_codex::TestCodex; use core_test_support::test_codex::test_codex; use core_test_support::wait_for_event; use core_test_support::wait_for_event_with_timeout; +use core_test_support::zsh_fork::build_unified_exec_zsh_fork_test; use core_test_support::zsh_fork::build_zsh_fork_test; use core_test_support::zsh_fork::restrictive_workspace_write_policy; use core_test_support::zsh_fork::zsh_fork_runtime; @@ -1985,6 +1986,158 @@ async fn approving_execpolicy_amendment_persists_policy_and_skips_future_prompts Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +#[cfg(unix)] +async fn unified_exec_zsh_fork_execpolicy_amendment_skips_later_subcommands() -> Result<()> { + skip_if_no_network!(Ok(())); + + let Some(runtime) = zsh_fork_runtime("unified exec zsh-fork execpolicy amendment test")? else { + return Ok(()); + }; + + let approval_policy = AskForApproval::UnlessTrusted; + let sandbox_policy = SandboxPolicy::new_read_only_policy(); + let server = start_mock_server().await; + let test = build_unified_exec_zsh_fork_test( + &server, + runtime, + approval_policy, + sandbox_policy.clone(), + |_| {}, + ) + .await?; + let allow_prefix_path = test.cwd.path().join("allow-prefix-zsh-fork.txt"); + let _ = fs::remove_file(&allow_prefix_path); + + let call_id = "allow-prefix-zsh-fork"; + let command = "touch allow-prefix-zsh-fork.txt && touch allow-prefix-zsh-fork.txt"; + let event = exec_command_event( + call_id, + command, + Some(1_000), + SandboxPermissions::UseDefault, + None, + )?; + let _ = mount_sse_once( + &server, + sse(vec![ + ev_response_created("resp-zsh-fork-allow-prefix-1"), + event, + ev_completed("resp-zsh-fork-allow-prefix-1"), + ]), + ) + .await; + let results = mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-zsh-fork-allow-prefix-1", "done"), + ev_completed("resp-zsh-fork-allow-prefix-2"), + ]), + ) + .await; + + submit_turn( + &test, + "allow-prefix-zsh-fork", + approval_policy, + sandbox_policy, + ) + .await?; + + let expected_execpolicy_amendment = ExecPolicyAmendment::new(vec![ + "touch".to_string(), + "allow-prefix-zsh-fork.txt".to_string(), + ]); + let mut saw_parent_approval = false; + let mut saw_subcommand_approval = false; + loop { + let event = wait_for_event(&test.codex, |event| { + matches!( + event, + EventMsg::ExecApprovalRequest(_) | EventMsg::TurnComplete(_) + ) + }) + .await; + + match event { + EventMsg::TurnComplete(_) => break, + EventMsg::ExecApprovalRequest(approval) => { + let command_parts = approval.command.clone(); + let last_arg = command_parts.last().map(String::as_str).unwrap_or_default(); + if last_arg == command { + assert!( + !saw_parent_approval, + "unexpected duplicate parent approval: {command_parts:?}" + ); + saw_parent_approval = true; + test.codex + .submit(Op::ExecApproval { + id: approval.effective_approval_id(), + turn_id: None, + decision: ReviewDecision::Approved, + }) + .await?; + continue; + } + + let is_touch_subcommand = command_parts + .iter() + .any(|part| part == "allow-prefix-zsh-fork.txt") + && command_parts + .first() + .is_some_and(|part| part.ends_with("/touch") || part == "touch"); + if is_touch_subcommand { + assert!( + !saw_subcommand_approval, + "execpolicy amendment should suppress later matching subcommand approvals: {command_parts:?}" + ); + saw_subcommand_approval = true; + assert_eq!( + approval.proposed_execpolicy_amendment, + Some(expected_execpolicy_amendment.clone()) + ); + test.codex + .submit(Op::ExecApproval { + id: approval.effective_approval_id(), + turn_id: None, + decision: ReviewDecision::ApprovedExecpolicyAmendment { + proposed_execpolicy_amendment: expected_execpolicy_amendment + .clone(), + }, + }) + .await?; + continue; + } + + test.codex + .submit(Op::ExecApproval { + id: approval.effective_approval_id(), + turn_id: None, + decision: ReviewDecision::Approved, + }) + .await?; + } + other => panic!("unexpected event: {other:?}"), + } + } + + assert!(saw_parent_approval, "expected parent unified-exec approval"); + assert!( + saw_subcommand_approval, + "expected at least one intercepted touch approval" + ); + + let result = parse_result(&results.single_request().function_call_output(call_id)); + assert_eq!(result.exit_code.unwrap_or(0), 0); + assert!( + allow_prefix_path.exists(), + "expected touch command to complete after approving the first intercepted subcommand; output: {}", + result.stdout + ); + + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] #[cfg(unix)] async fn matched_prefix_rule_runs_unsandboxed_under_zsh_fork() -> Result<()> { diff --git a/codex-rs/core/tests/suite/skill_approval.rs b/codex-rs/core/tests/suite/skill_approval.rs index d81cbb5c66..6822791093 100644 --- a/codex-rs/core/tests/suite/skill_approval.rs +++ b/codex-rs/core/tests/suite/skill_approval.rs @@ -18,6 +18,7 @@ use core_test_support::skip_if_no_network; use core_test_support::test_codex::TestCodex; use core_test_support::wait_for_event; use core_test_support::wait_for_event_match; +use core_test_support::zsh_fork::build_unified_exec_zsh_fork_test; use core_test_support::zsh_fork::build_zsh_fork_test; use core_test_support::zsh_fork::restrictive_workspace_write_policy; use core_test_support::zsh_fork::zsh_fork_runtime; @@ -48,6 +49,13 @@ fn shell_command_arguments(command: &str) -> Result { }))?) } +fn exec_command_arguments(command: &str) -> Result { + Ok(serde_json::to_string(&json!({ + "cmd": command, + "yield_time_ms": 500, + }))?) +} + async fn submit_turn_with_policies( test: &TestCodex, prompt: &str, @@ -87,6 +95,38 @@ echo 'zsh-fork-stderr' >&2 ) } +#[cfg(unix)] +fn write_repo_skill_with_shell_script_contents( + repo_root: &Path, + name: &str, + script_name: &str, + script_contents: &str, +) -> Result { + use std::os::unix::fs::PermissionsExt; + + let skill_dir = repo_root.join(".agents").join("skills").join(name); + let scripts_dir = skill_dir.join("scripts"); + fs::create_dir_all(&scripts_dir)?; + fs::write(repo_root.join(".git"), "gitdir: here")?; + fs::write( + skill_dir.join("SKILL.md"), + format!( + r#"--- +name: {name} +description: {name} skill +--- +"# + ), + )?; + + let script_path = scripts_dir.join(script_name); + fs::write(&script_path, script_contents)?; + let mut permissions = fs::metadata(&script_path)?.permissions(); + permissions.set_mode(0o755); + fs::set_permissions(&script_path, permissions)?; + Ok(script_path) +} + #[cfg(unix)] fn write_skill_with_shell_script_contents( home: &Path, @@ -265,6 +305,168 @@ permissions: Ok(()) } +#[cfg(unix)] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn unified_exec_zsh_fork_prompts_for_skill_script_execution() -> Result<()> { + skip_if_no_network!(Ok(())); + + let Some(runtime) = zsh_fork_runtime("unified exec zsh-fork skill prompt test")? else { + return Ok(()); + }; + + let server = start_mock_server().await; + let tool_call_id = "uexec-zsh-fork-skill-call"; + let test = build_unified_exec_zsh_fork_test( + &server, + runtime, + AskForApproval::OnRequest, + SandboxPolicy::new_workspace_write_policy(), + |home| { + write_skill_with_shell_script(home, "mbolin-test-skill", "hello-mbolin.sh").unwrap(); + write_skill_metadata( + home, + "mbolin-test-skill", + r#" +permissions: + file_system: + read: + - "./data" + write: + - "./output" +"#, + ) + .unwrap(); + }, + ) + .await?; + + let (script_path_str, command) = skill_script_command(&test, "hello-mbolin.sh")?; + let arguments = exec_command_arguments(&command)?; + let mocks = + mount_function_call_agent_response(&server, tool_call_id, &arguments, "exec_command").await; + + submit_turn_with_policies( + &test, + "use $mbolin-test-skill", + AskForApproval::OnRequest, + SandboxPolicy::new_workspace_write_policy(), + ) + .await?; + + let approval = wait_for_exec_approval_request(&test) + .await + .expect("expected exec approval request before completion"); + assert_eq!(approval.call_id, tool_call_id); + assert_eq!(approval.command, vec![script_path_str.clone()]); + assert_eq!( + approval.available_decisions, + Some(vec![ + ReviewDecision::Approved, + ReviewDecision::ApprovedForSession, + ReviewDecision::Abort, + ]) + ); + assert_eq!( + approval.additional_permissions, + Some(PermissionProfile { + file_system: Some(FileSystemPermissions { + read: Some(vec![absolute_path( + &test.codex_home_path().join("skills/mbolin-test-skill/data"), + )]), + write: Some(vec![absolute_path( + &test + .codex_home_path() + .join("skills/mbolin-test-skill/output"), + )]), + }), + ..Default::default() + }) + ); + + test.codex + .submit(Op::ExecApproval { + id: approval.effective_approval_id(), + turn_id: None, + decision: ReviewDecision::Denied, + }) + .await?; + + wait_for_turn_complete(&test).await; + + let call_output = mocks + .completion + .single_request() + .function_call_output(tool_call_id); + let output = call_output["output"].as_str().unwrap_or_default(); + assert!( + output.contains("Execution denied: User denied execution"), + "expected rejection marker in function_call_output: {output:?}" + ); + + Ok(()) +} + +#[cfg(unix)] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn unified_exec_zsh_fork_keeps_skill_loading_pinned_to_turn_cwd() -> Result<()> { + skip_if_no_network!(Ok(())); + + let Some(runtime) = zsh_fork_runtime("unified exec zsh-fork turn cwd skill test")? else { + return Ok(()); + }; + + let server = start_mock_server().await; + let tool_call_id = "uexec-zsh-fork-repo-skill-call"; + let test = build_unified_exec_zsh_fork_test( + &server, + runtime, + AskForApproval::OnRequest, + SandboxPolicy::new_workspace_write_policy(), + |_| {}, + ) + .await?; + + let repo_root = test.cwd_path().join("repo"); + let script_path = write_repo_skill_with_shell_script_contents( + &repo_root, + "repo-skill", + "repo-skill.sh", + "#!/bin/sh\necho 'repo-skill-output'\n", + )?; + let script_path_quoted = shlex::try_join([script_path.to_string_lossy().as_ref()])?; + let repo_root_quoted = shlex::try_join([repo_root.to_string_lossy().as_ref()])?; + let command = format!("cd {repo_root_quoted} && {script_path_quoted}"); + let arguments = exec_command_arguments(&command)?; + let mocks = + mount_function_call_agent_response(&server, tool_call_id, &arguments, "exec_command").await; + + submit_turn_with_policies( + &test, + "run the repo skill after changing directories", + AskForApproval::OnRequest, + SandboxPolicy::new_workspace_write_policy(), + ) + .await?; + + let approval = wait_for_exec_approval_request(&test).await; + assert!( + approval.is_none(), + "changing directories inside unified exec should not load repo-local skills from the shell cwd", + ); + + let call_output = mocks + .completion + .single_request() + .function_call_output(tool_call_id); + let output = call_output["output"].as_str().unwrap_or_default(); + assert!( + output.contains("repo-skill-output"), + "expected repo skill script to run without skill-specific approval when only the shell cwd changes: {output:?}" + ); + + Ok(()) +} + /// Look for `additional_permissions == None`, then verify that both the first /// run and the cached session-approval rerun stay inside the turn sandbox. #[cfg(unix)]