mirror of
https://github.com/openai/codex.git
synced 2026-03-05 21:45:28 +03:00
refactor: route zsh-fork through unified exec
This commit is contained in:
@@ -11,7 +11,6 @@ use app_test_support::McpProcess;
|
||||
use app_test_support::create_final_assistant_message_sse_response;
|
||||
use app_test_support::create_mock_responses_server_sequence;
|
||||
use app_test_support::create_mock_responses_server_sequence_unchecked;
|
||||
use app_test_support::create_shell_command_sse_response;
|
||||
use app_test_support::to_response;
|
||||
use codex_app_server_protocol::CommandAction;
|
||||
use codex_app_server_protocol::CommandExecutionApprovalDecision;
|
||||
@@ -35,9 +34,11 @@ use codex_core::features::Feature;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use std::collections::BTreeMap;
|
||||
use std::path::Path;
|
||||
use tempfile::TempDir;
|
||||
use tokio::time::sleep;
|
||||
use tokio::time::timeout;
|
||||
|
||||
#[cfg(windows)]
|
||||
@@ -61,10 +62,8 @@ async fn turn_start_shell_zsh_fork_executes_command_v2() -> Result<()> {
|
||||
};
|
||||
eprintln!("using zsh path for zsh-fork test: {}", zsh_path.display());
|
||||
|
||||
let responses = vec![create_shell_command_sse_response(
|
||||
vec!["echo".to_string(), "hi".to_string()],
|
||||
None,
|
||||
Some(5000),
|
||||
let responses = vec![create_zsh_fork_exec_command_sse_response(
|
||||
"echo hi",
|
||||
"call-zsh-fork",
|
||||
)?];
|
||||
let server = create_mock_responses_server_sequence(responses).await;
|
||||
@@ -74,7 +73,7 @@ async fn turn_start_shell_zsh_fork_executes_command_v2() -> Result<()> {
|
||||
"never",
|
||||
&BTreeMap::from([
|
||||
(Feature::ShellZshFork, true),
|
||||
(Feature::UnifiedExec, false),
|
||||
(Feature::UnifiedExec, true),
|
||||
(Feature::ShellSnapshot, false),
|
||||
]),
|
||||
&zsh_path,
|
||||
@@ -172,14 +171,8 @@ async fn turn_start_shell_zsh_fork_exec_approval_decline_v2() -> Result<()> {
|
||||
eprintln!("using zsh path for zsh-fork test: {}", zsh_path.display());
|
||||
|
||||
let responses = vec![
|
||||
create_shell_command_sse_response(
|
||||
vec![
|
||||
"python3".to_string(),
|
||||
"-c".to_string(),
|
||||
"print(42)".to_string(),
|
||||
],
|
||||
None,
|
||||
Some(5000),
|
||||
create_zsh_fork_exec_command_sse_response(
|
||||
"python3 -c 'print(42)'",
|
||||
"call-zsh-fork-decline",
|
||||
)?,
|
||||
create_final_assistant_message_sse_response("done")?,
|
||||
@@ -191,7 +184,7 @@ async fn turn_start_shell_zsh_fork_exec_approval_decline_v2() -> Result<()> {
|
||||
"untrusted",
|
||||
&BTreeMap::from([
|
||||
(Feature::ShellZshFork, true),
|
||||
(Feature::UnifiedExec, false),
|
||||
(Feature::UnifiedExec, true),
|
||||
(Feature::ShellSnapshot, false),
|
||||
]),
|
||||
&zsh_path,
|
||||
@@ -307,14 +300,8 @@ async fn turn_start_shell_zsh_fork_exec_approval_cancel_v2() -> Result<()> {
|
||||
};
|
||||
eprintln!("using zsh path for zsh-fork test: {}", zsh_path.display());
|
||||
|
||||
let responses = vec![create_shell_command_sse_response(
|
||||
vec![
|
||||
"python3".to_string(),
|
||||
"-c".to_string(),
|
||||
"print(42)".to_string(),
|
||||
],
|
||||
None,
|
||||
Some(5000),
|
||||
let responses = vec![create_zsh_fork_exec_command_sse_response(
|
||||
"python3 -c 'print(42)'",
|
||||
"call-zsh-fork-cancel",
|
||||
)?];
|
||||
let server = create_mock_responses_server_sequence(responses).await;
|
||||
@@ -324,7 +311,7 @@ async fn turn_start_shell_zsh_fork_exec_approval_cancel_v2() -> Result<()> {
|
||||
"untrusted",
|
||||
&BTreeMap::from([
|
||||
(Feature::ShellZshFork, true),
|
||||
(Feature::UnifiedExec, false),
|
||||
(Feature::UnifiedExec, true),
|
||||
(Feature::ShellSnapshot, false),
|
||||
]),
|
||||
&zsh_path,
|
||||
@@ -422,6 +409,181 @@ async fn turn_start_shell_zsh_fork_exec_approval_cancel_v2() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn turn_start_shell_zsh_fork_interrupt_kills_approved_subcommand_v2() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let tmp = TempDir::new()?;
|
||||
let codex_home = tmp.path().join("codex_home");
|
||||
std::fs::create_dir(&codex_home)?;
|
||||
let workspace = tmp.path().join("workspace");
|
||||
std::fs::create_dir(&workspace)?;
|
||||
let pid_file = workspace.join("approved-subcommand.pid");
|
||||
let pid_file_display = pid_file.display().to_string();
|
||||
assert!(
|
||||
!pid_file_display.contains('\''),
|
||||
"test workspace path should not contain single quotes: {pid_file_display}"
|
||||
);
|
||||
|
||||
let Some(zsh_path) = find_test_zsh_path()? else {
|
||||
eprintln!("skipping zsh fork interrupt cleanup test: no zsh executable found");
|
||||
return Ok(());
|
||||
};
|
||||
if !supports_exec_wrapper_intercept(&zsh_path) {
|
||||
eprintln!(
|
||||
"skipping zsh fork interrupt cleanup test: zsh does not support EXEC_WRAPPER intercepts ({})",
|
||||
zsh_path.display()
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
let zsh_path_display = zsh_path.display().to_string();
|
||||
eprintln!("using zsh path for zsh-fork test: {zsh_path_display}");
|
||||
|
||||
let shell_command =
|
||||
format!("/bin/sh -c 'echo $$ > \"{pid_file_display}\" && exec /bin/sleep 100'");
|
||||
let tool_call_arguments = serde_json::to_string(&json!({
|
||||
"cmd": shell_command,
|
||||
"yield_time_ms": 30_000,
|
||||
}))?;
|
||||
let response = responses::sse(vec![
|
||||
responses::ev_response_created("resp-1"),
|
||||
responses::ev_function_call(
|
||||
"call-zsh-fork-interrupt-cleanup",
|
||||
"exec_command",
|
||||
&tool_call_arguments,
|
||||
),
|
||||
responses::ev_completed("resp-1"),
|
||||
]);
|
||||
let no_op_response = responses::sse(vec![
|
||||
responses::ev_response_created("resp-2"),
|
||||
responses::ev_completed("resp-2"),
|
||||
]);
|
||||
let server =
|
||||
create_mock_responses_server_sequence_unchecked(vec![response, no_op_response]).await;
|
||||
create_config_toml(
|
||||
&codex_home,
|
||||
&server.uri(),
|
||||
"untrusted",
|
||||
&BTreeMap::from([
|
||||
(Feature::ShellZshFork, true),
|
||||
(Feature::UnifiedExec, true),
|
||||
(Feature::ShellSnapshot, false),
|
||||
]),
|
||||
&zsh_path,
|
||||
)?;
|
||||
|
||||
let mut mcp = create_zsh_test_mcp_process(&codex_home, &workspace).await?;
|
||||
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
|
||||
|
||||
let start_id = mcp
|
||||
.send_thread_start_request(ThreadStartParams {
|
||||
model: Some("mock-model".to_string()),
|
||||
cwd: Some(workspace.to_string_lossy().into_owned()),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
let start_resp: JSONRPCResponse = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_response_message(RequestId::Integer(start_id)),
|
||||
)
|
||||
.await??;
|
||||
let ThreadStartResponse { thread, .. } = to_response::<ThreadStartResponse>(start_resp)?;
|
||||
|
||||
let turn_id = mcp
|
||||
.send_turn_start_request(TurnStartParams {
|
||||
thread_id: thread.id.clone(),
|
||||
input: vec![V2UserInput::Text {
|
||||
text: "run the long-lived command".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
cwd: Some(workspace.clone()),
|
||||
approval_policy: Some(codex_app_server_protocol::AskForApproval::UnlessTrusted),
|
||||
sandbox_policy: Some(codex_app_server_protocol::SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: vec![workspace.clone().try_into()?],
|
||||
read_only_access: codex_app_server_protocol::ReadOnlyAccess::FullAccess,
|
||||
network_access: false,
|
||||
exclude_tmpdir_env_var: false,
|
||||
exclude_slash_tmp: false,
|
||||
}),
|
||||
model: Some("mock-model".to_string()),
|
||||
effort: Some(codex_protocol::openai_models::ReasoningEffort::Medium),
|
||||
summary: Some(codex_protocol::config_types::ReasoningSummary::Auto),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
let turn_resp: JSONRPCResponse = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_response_message(RequestId::Integer(turn_id)),
|
||||
)
|
||||
.await??;
|
||||
let TurnStartResponse { turn } = to_response::<TurnStartResponse>(turn_resp)?;
|
||||
|
||||
let mut saw_target_approval = false;
|
||||
while !saw_target_approval {
|
||||
let server_req = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp.read_stream_until_request_message(),
|
||||
)
|
||||
.await??;
|
||||
let ServerRequest::CommandExecutionRequestApproval { request_id, params } = server_req
|
||||
else {
|
||||
panic!("expected CommandExecutionRequestApproval request");
|
||||
};
|
||||
let approval_command = params.command.clone().unwrap_or_default();
|
||||
saw_target_approval = approval_command.contains("/bin/sh")
|
||||
&& approval_command.contains(&pid_file_display)
|
||||
&& !approval_command.contains(&zsh_path_display);
|
||||
mcp.send_response(
|
||||
request_id,
|
||||
serde_json::to_value(CommandExecutionRequestApprovalResponse {
|
||||
decision: CommandExecutionApprovalDecision::Accept,
|
||||
})?,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
let pid = timeout(DEFAULT_READ_TIMEOUT, async {
|
||||
loop {
|
||||
if let Ok(contents) = std::fs::read_to_string(&pid_file) {
|
||||
return Ok::<i32, anyhow::Error>(contents.trim().parse()?);
|
||||
}
|
||||
sleep(std::time::Duration::from_millis(20)).await;
|
||||
}
|
||||
})
|
||||
.await??;
|
||||
let still_running = std::process::Command::new("/bin/kill")
|
||||
.args(["-0", &pid.to_string()])
|
||||
.status()?
|
||||
.success();
|
||||
assert!(
|
||||
still_running,
|
||||
"expected approved intercepted subprocess pid {pid} to be running before interrupt"
|
||||
);
|
||||
|
||||
mcp.interrupt_turn_and_wait_for_aborted(
|
||||
thread.id.clone(),
|
||||
turn.id.clone(),
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
)
|
||||
.await?;
|
||||
|
||||
timeout(DEFAULT_READ_TIMEOUT, async {
|
||||
loop {
|
||||
let still_running = std::process::Command::new("/bin/kill")
|
||||
.args(["-0", &pid.to_string()])
|
||||
.status()?
|
||||
.success();
|
||||
if !still_running {
|
||||
return Ok::<(), anyhow::Error>(());
|
||||
}
|
||||
sleep(std::time::Duration::from_millis(20)).await;
|
||||
}
|
||||
})
|
||||
.await??;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
@@ -453,16 +615,15 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
|
||||
first_file.display(),
|
||||
second_file.display()
|
||||
);
|
||||
let tool_call_arguments = serde_json::to_string(&serde_json::json!({
|
||||
"command": shell_command,
|
||||
"workdir": serde_json::Value::Null,
|
||||
"timeout_ms": 5000
|
||||
let tool_call_arguments = serde_json::to_string(&json!({
|
||||
"cmd": shell_command,
|
||||
"yield_time_ms": 5000,
|
||||
}))?;
|
||||
let response = responses::sse(vec![
|
||||
responses::ev_response_created("resp-1"),
|
||||
responses::ev_function_call(
|
||||
"call-zsh-fork-subcommand-decline",
|
||||
"shell_command",
|
||||
"exec_command",
|
||||
&tool_call_arguments,
|
||||
),
|
||||
responses::ev_completed("resp-1"),
|
||||
@@ -483,7 +644,7 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
|
||||
"untrusted",
|
||||
&BTreeMap::from([
|
||||
(Feature::ShellZshFork, true),
|
||||
(Feature::UnifiedExec, false),
|
||||
(Feature::UnifiedExec, true),
|
||||
(Feature::ShellSnapshot, false),
|
||||
]),
|
||||
&zsh_path,
|
||||
@@ -694,6 +855,21 @@ async fn create_zsh_test_mcp_process(codex_home: &Path, zdotdir: &Path) -> Resul
|
||||
McpProcess::new_with_env(codex_home, &[("ZDOTDIR", Some(zdotdir.as_str()))]).await
|
||||
}
|
||||
|
||||
fn create_zsh_fork_exec_command_sse_response(
|
||||
command: &str,
|
||||
call_id: &str,
|
||||
) -> anyhow::Result<String> {
|
||||
let tool_call_arguments = serde_json::to_string(&json!({
|
||||
"cmd": command,
|
||||
"yield_time_ms": 5000,
|
||||
}))?;
|
||||
Ok(responses::sse(vec![
|
||||
responses::ev_response_created("resp-1"),
|
||||
responses::ev_function_call(call_id, "exec_command", &tool_call_arguments),
|
||||
responses::ev_completed("resp-1"),
|
||||
]))
|
||||
}
|
||||
|
||||
fn create_config_toml(
|
||||
codex_home: &Path,
|
||||
server_uri: &str,
|
||||
|
||||
@@ -49,7 +49,6 @@ use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use uuid::Uuid;
|
||||
|
||||
@@ -107,9 +106,6 @@ pub(super) async fn try_run_zsh_fork(
|
||||
req.timeout_ms
|
||||
.unwrap_or(crate::exec::DEFAULT_EXEC_COMMAND_TIMEOUT_MS),
|
||||
);
|
||||
let exec_policy = Arc::new(RwLock::new(
|
||||
ctx.session.services.exec_policy.current().as_ref().clone(),
|
||||
));
|
||||
let command_executor = CoreShellCommandExecutor {
|
||||
command,
|
||||
cwd: sandbox_cwd,
|
||||
@@ -154,7 +150,6 @@ pub(super) async fn try_run_zsh_fork(
|
||||
let stopwatch = Stopwatch::new(effective_timeout);
|
||||
let cancel_token = stopwatch.cancellation_token();
|
||||
let escalation_policy = CoreShellActionProvider {
|
||||
policy: Arc::clone(&exec_policy),
|
||||
session: Arc::clone(&ctx.session),
|
||||
turn: Arc::clone(&ctx.turn),
|
||||
call_id: ctx.call_id.clone(),
|
||||
@@ -214,9 +209,6 @@ pub(crate) async fn prepare_unified_exec_zsh_fork(
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let exec_policy = Arc::new(RwLock::new(
|
||||
ctx.session.services.exec_policy.current().as_ref().clone(),
|
||||
));
|
||||
let command_executor = CoreShellCommandExecutor {
|
||||
command: exec_request.command.clone(),
|
||||
cwd: exec_request.cwd.clone(),
|
||||
@@ -249,7 +241,6 @@ pub(crate) async fn prepare_unified_exec_zsh_fork(
|
||||
)
|
||||
})?;
|
||||
let escalation_policy = CoreShellActionProvider {
|
||||
policy: Arc::clone(&exec_policy),
|
||||
session: Arc::clone(&ctx.session),
|
||||
turn: Arc::clone(&ctx.turn),
|
||||
call_id: ctx.call_id.clone(),
|
||||
@@ -277,7 +268,6 @@ pub(crate) async fn prepare_unified_exec_zsh_fork(
|
||||
}
|
||||
|
||||
struct CoreShellActionProvider {
|
||||
policy: Arc<RwLock<Policy>>,
|
||||
session: Arc<crate::codex::Session>,
|
||||
turn: Arc<crate::codex::TurnContext>,
|
||||
call_id: String,
|
||||
@@ -597,18 +587,16 @@ impl EscalationPolicy for CoreShellActionProvider {
|
||||
.await;
|
||||
}
|
||||
|
||||
let evaluation = {
|
||||
let policy = self.policy.read().await;
|
||||
evaluate_intercepted_exec_policy(
|
||||
&policy,
|
||||
program,
|
||||
argv,
|
||||
self.approval_policy,
|
||||
&self.sandbox_policy,
|
||||
self.sandbox_permissions,
|
||||
ENABLE_INTERCEPTED_EXEC_POLICY_SHELL_WRAPPER_PARSING,
|
||||
)
|
||||
};
|
||||
let policy = self.session.services.exec_policy.current();
|
||||
let evaluation = evaluate_intercepted_exec_policy(
|
||||
policy.as_ref(),
|
||||
program,
|
||||
argv,
|
||||
self.approval_policy,
|
||||
&self.sandbox_policy,
|
||||
self.sandbox_permissions,
|
||||
ENABLE_INTERCEPTED_EXEC_POLICY_SHELL_WRAPPER_PARSING,
|
||||
);
|
||||
// When true, means the Evaluation was due to *.rules, not the
|
||||
// fallback function.
|
||||
let decision_driven_by_policy =
|
||||
|
||||
@@ -115,8 +115,6 @@ impl ToolsConfig {
|
||||
|
||||
let shell_type = if !features.enabled(Feature::ShellTool) {
|
||||
ConfigShellToolType::Disabled
|
||||
} else if features.enabled(Feature::ShellZshFork) {
|
||||
ConfigShellToolType::ShellCommand
|
||||
} else if features.enabled(Feature::UnifiedExec) {
|
||||
// If ConPTY not supported (for old Windows versions), fallback on ShellCommand.
|
||||
if codex_utils_pty::conpty_supported() {
|
||||
@@ -124,6 +122,8 @@ impl ToolsConfig {
|
||||
} else {
|
||||
ConfigShellToolType::ShellCommand
|
||||
}
|
||||
} else if features.enabled(Feature::ShellZshFork) {
|
||||
ConfigShellToolType::ShellCommand
|
||||
} else {
|
||||
model_info.shell_type
|
||||
};
|
||||
@@ -2765,7 +2765,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shell_zsh_fork_prefers_shell_command_over_unified_exec() {
|
||||
fn shell_zsh_fork_uses_unified_exec_when_enabled() {
|
||||
let config = test_config();
|
||||
let model_info = ModelsManager::construct_model_info_offline_for_tests("o3", &config);
|
||||
let mut features = Features::with_defaults();
|
||||
@@ -2779,7 +2779,7 @@ mod tests {
|
||||
session_source: SessionSource::Cli,
|
||||
});
|
||||
|
||||
assert_eq!(tools_config.shell_type, ConfigShellToolType::ShellCommand);
|
||||
assert_eq!(tools_config.shell_type, ConfigShellToolType::UnifiedExec);
|
||||
assert_eq!(
|
||||
tools_config.shell_command_backend,
|
||||
ShellCommandBackendConfig::ZshFork
|
||||
|
||||
@@ -91,6 +91,29 @@ where
|
||||
builder.build(server).await
|
||||
}
|
||||
|
||||
pub async fn build_unified_exec_zsh_fork_test<F>(
|
||||
server: &wiremock::MockServer,
|
||||
runtime: ZshForkRuntime,
|
||||
approval_policy: AskForApproval,
|
||||
sandbox_policy: SandboxPolicy,
|
||||
pre_build_hook: F,
|
||||
) -> Result<TestCodex>
|
||||
where
|
||||
F: FnOnce(&Path) + Send + 'static,
|
||||
{
|
||||
let mut builder = test_codex()
|
||||
.with_pre_build_hook(pre_build_hook)
|
||||
.with_config(move |config| {
|
||||
runtime.apply_to_config(config, approval_policy, sandbox_policy);
|
||||
config.use_experimental_unified_exec_tool = true;
|
||||
config
|
||||
.features
|
||||
.enable(Feature::UnifiedExec)
|
||||
.expect("test config should allow feature update");
|
||||
});
|
||||
builder.build(server).await
|
||||
}
|
||||
|
||||
fn find_test_zsh_path() -> Result<Option<PathBuf>> {
|
||||
let repo_root = codex_utils_cargo_bin::repo_root()?;
|
||||
let dotslash_zsh = repo_root.join("codex-rs/app-server/tests/suite/zsh");
|
||||
|
||||
@@ -35,6 +35,7 @@ use core_test_support::test_codex::TestCodex;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use core_test_support::wait_for_event_with_timeout;
|
||||
use core_test_support::zsh_fork::build_unified_exec_zsh_fork_test;
|
||||
use core_test_support::zsh_fork::build_zsh_fork_test;
|
||||
use core_test_support::zsh_fork::restrictive_workspace_write_policy;
|
||||
use core_test_support::zsh_fork::zsh_fork_runtime;
|
||||
@@ -1985,6 +1986,158 @@ async fn approving_execpolicy_amendment_persists_policy_and_skips_future_prompts
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
#[cfg(unix)]
|
||||
async fn unified_exec_zsh_fork_execpolicy_amendment_skips_later_subcommands() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let Some(runtime) = zsh_fork_runtime("unified exec zsh-fork execpolicy amendment test")? else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let approval_policy = AskForApproval::UnlessTrusted;
|
||||
let sandbox_policy = SandboxPolicy::new_read_only_policy();
|
||||
let server = start_mock_server().await;
|
||||
let test = build_unified_exec_zsh_fork_test(
|
||||
&server,
|
||||
runtime,
|
||||
approval_policy,
|
||||
sandbox_policy.clone(),
|
||||
|_| {},
|
||||
)
|
||||
.await?;
|
||||
let allow_prefix_path = test.cwd.path().join("allow-prefix-zsh-fork.txt");
|
||||
let _ = fs::remove_file(&allow_prefix_path);
|
||||
|
||||
let call_id = "allow-prefix-zsh-fork";
|
||||
let command = "touch allow-prefix-zsh-fork.txt && touch allow-prefix-zsh-fork.txt";
|
||||
let event = exec_command_event(
|
||||
call_id,
|
||||
command,
|
||||
Some(1_000),
|
||||
SandboxPermissions::UseDefault,
|
||||
None,
|
||||
)?;
|
||||
let _ = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-zsh-fork-allow-prefix-1"),
|
||||
event,
|
||||
ev_completed("resp-zsh-fork-allow-prefix-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let results = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-zsh-fork-allow-prefix-1", "done"),
|
||||
ev_completed("resp-zsh-fork-allow-prefix-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"allow-prefix-zsh-fork",
|
||||
approval_policy,
|
||||
sandbox_policy,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let expected_execpolicy_amendment = ExecPolicyAmendment::new(vec![
|
||||
"touch".to_string(),
|
||||
"allow-prefix-zsh-fork.txt".to_string(),
|
||||
]);
|
||||
let mut saw_parent_approval = false;
|
||||
let mut saw_subcommand_approval = false;
|
||||
loop {
|
||||
let event = wait_for_event(&test.codex, |event| {
|
||||
matches!(
|
||||
event,
|
||||
EventMsg::ExecApprovalRequest(_) | EventMsg::TurnComplete(_)
|
||||
)
|
||||
})
|
||||
.await;
|
||||
|
||||
match event {
|
||||
EventMsg::TurnComplete(_) => break,
|
||||
EventMsg::ExecApprovalRequest(approval) => {
|
||||
let command_parts = approval.command.clone();
|
||||
let last_arg = command_parts.last().map(String::as_str).unwrap_or_default();
|
||||
if last_arg == command {
|
||||
assert!(
|
||||
!saw_parent_approval,
|
||||
"unexpected duplicate parent approval: {command_parts:?}"
|
||||
);
|
||||
saw_parent_approval = true;
|
||||
test.codex
|
||||
.submit(Op::ExecApproval {
|
||||
id: approval.effective_approval_id(),
|
||||
turn_id: None,
|
||||
decision: ReviewDecision::Approved,
|
||||
})
|
||||
.await?;
|
||||
continue;
|
||||
}
|
||||
|
||||
let is_touch_subcommand = command_parts
|
||||
.iter()
|
||||
.any(|part| part == "allow-prefix-zsh-fork.txt")
|
||||
&& command_parts
|
||||
.first()
|
||||
.is_some_and(|part| part.ends_with("/touch") || part == "touch");
|
||||
if is_touch_subcommand {
|
||||
assert!(
|
||||
!saw_subcommand_approval,
|
||||
"execpolicy amendment should suppress later matching subcommand approvals: {command_parts:?}"
|
||||
);
|
||||
saw_subcommand_approval = true;
|
||||
assert_eq!(
|
||||
approval.proposed_execpolicy_amendment,
|
||||
Some(expected_execpolicy_amendment.clone())
|
||||
);
|
||||
test.codex
|
||||
.submit(Op::ExecApproval {
|
||||
id: approval.effective_approval_id(),
|
||||
turn_id: None,
|
||||
decision: ReviewDecision::ApprovedExecpolicyAmendment {
|
||||
proposed_execpolicy_amendment: expected_execpolicy_amendment
|
||||
.clone(),
|
||||
},
|
||||
})
|
||||
.await?;
|
||||
continue;
|
||||
}
|
||||
|
||||
test.codex
|
||||
.submit(Op::ExecApproval {
|
||||
id: approval.effective_approval_id(),
|
||||
turn_id: None,
|
||||
decision: ReviewDecision::Approved,
|
||||
})
|
||||
.await?;
|
||||
}
|
||||
other => panic!("unexpected event: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
assert!(saw_parent_approval, "expected parent unified-exec approval");
|
||||
assert!(
|
||||
saw_subcommand_approval,
|
||||
"expected at least one intercepted touch approval"
|
||||
);
|
||||
|
||||
let result = parse_result(&results.single_request().function_call_output(call_id));
|
||||
assert_eq!(result.exit_code.unwrap_or(0), 0);
|
||||
assert!(
|
||||
allow_prefix_path.exists(),
|
||||
"expected touch command to complete after approving the first intercepted subcommand; output: {}",
|
||||
result.stdout
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
#[cfg(unix)]
|
||||
async fn matched_prefix_rule_runs_unsandboxed_under_zsh_fork() -> Result<()> {
|
||||
|
||||
@@ -18,6 +18,7 @@ use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::TestCodex;
|
||||
use core_test_support::wait_for_event;
|
||||
use core_test_support::wait_for_event_match;
|
||||
use core_test_support::zsh_fork::build_unified_exec_zsh_fork_test;
|
||||
use core_test_support::zsh_fork::build_zsh_fork_test;
|
||||
use core_test_support::zsh_fork::restrictive_workspace_write_policy;
|
||||
use core_test_support::zsh_fork::zsh_fork_runtime;
|
||||
@@ -48,6 +49,13 @@ fn shell_command_arguments(command: &str) -> Result<String> {
|
||||
}))?)
|
||||
}
|
||||
|
||||
fn exec_command_arguments(command: &str) -> Result<String> {
|
||||
Ok(serde_json::to_string(&json!({
|
||||
"cmd": command,
|
||||
"yield_time_ms": 500,
|
||||
}))?)
|
||||
}
|
||||
|
||||
async fn submit_turn_with_policies(
|
||||
test: &TestCodex,
|
||||
prompt: &str,
|
||||
@@ -87,6 +95,38 @@ echo 'zsh-fork-stderr' >&2
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn write_repo_skill_with_shell_script_contents(
|
||||
repo_root: &Path,
|
||||
name: &str,
|
||||
script_name: &str,
|
||||
script_contents: &str,
|
||||
) -> Result<PathBuf> {
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
|
||||
let skill_dir = repo_root.join(".agents").join("skills").join(name);
|
||||
let scripts_dir = skill_dir.join("scripts");
|
||||
fs::create_dir_all(&scripts_dir)?;
|
||||
fs::write(repo_root.join(".git"), "gitdir: here")?;
|
||||
fs::write(
|
||||
skill_dir.join("SKILL.md"),
|
||||
format!(
|
||||
r#"---
|
||||
name: {name}
|
||||
description: {name} skill
|
||||
---
|
||||
"#
|
||||
),
|
||||
)?;
|
||||
|
||||
let script_path = scripts_dir.join(script_name);
|
||||
fs::write(&script_path, script_contents)?;
|
||||
let mut permissions = fs::metadata(&script_path)?.permissions();
|
||||
permissions.set_mode(0o755);
|
||||
fs::set_permissions(&script_path, permissions)?;
|
||||
Ok(script_path)
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn write_skill_with_shell_script_contents(
|
||||
home: &Path,
|
||||
@@ -265,6 +305,168 @@ permissions:
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn unified_exec_zsh_fork_prompts_for_skill_script_execution() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let Some(runtime) = zsh_fork_runtime("unified exec zsh-fork skill prompt test")? else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let tool_call_id = "uexec-zsh-fork-skill-call";
|
||||
let test = build_unified_exec_zsh_fork_test(
|
||||
&server,
|
||||
runtime,
|
||||
AskForApproval::OnRequest,
|
||||
SandboxPolicy::new_workspace_write_policy(),
|
||||
|home| {
|
||||
write_skill_with_shell_script(home, "mbolin-test-skill", "hello-mbolin.sh").unwrap();
|
||||
write_skill_metadata(
|
||||
home,
|
||||
"mbolin-test-skill",
|
||||
r#"
|
||||
permissions:
|
||||
file_system:
|
||||
read:
|
||||
- "./data"
|
||||
write:
|
||||
- "./output"
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
||||
let (script_path_str, command) = skill_script_command(&test, "hello-mbolin.sh")?;
|
||||
let arguments = exec_command_arguments(&command)?;
|
||||
let mocks =
|
||||
mount_function_call_agent_response(&server, tool_call_id, &arguments, "exec_command").await;
|
||||
|
||||
submit_turn_with_policies(
|
||||
&test,
|
||||
"use $mbolin-test-skill",
|
||||
AskForApproval::OnRequest,
|
||||
SandboxPolicy::new_workspace_write_policy(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let approval = wait_for_exec_approval_request(&test)
|
||||
.await
|
||||
.expect("expected exec approval request before completion");
|
||||
assert_eq!(approval.call_id, tool_call_id);
|
||||
assert_eq!(approval.command, vec![script_path_str.clone()]);
|
||||
assert_eq!(
|
||||
approval.available_decisions,
|
||||
Some(vec![
|
||||
ReviewDecision::Approved,
|
||||
ReviewDecision::ApprovedForSession,
|
||||
ReviewDecision::Abort,
|
||||
])
|
||||
);
|
||||
assert_eq!(
|
||||
approval.additional_permissions,
|
||||
Some(PermissionProfile {
|
||||
file_system: Some(FileSystemPermissions {
|
||||
read: Some(vec![absolute_path(
|
||||
&test.codex_home_path().join("skills/mbolin-test-skill/data"),
|
||||
)]),
|
||||
write: Some(vec![absolute_path(
|
||||
&test
|
||||
.codex_home_path()
|
||||
.join("skills/mbolin-test-skill/output"),
|
||||
)]),
|
||||
}),
|
||||
..Default::default()
|
||||
})
|
||||
);
|
||||
|
||||
test.codex
|
||||
.submit(Op::ExecApproval {
|
||||
id: approval.effective_approval_id(),
|
||||
turn_id: None,
|
||||
decision: ReviewDecision::Denied,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_turn_complete(&test).await;
|
||||
|
||||
let call_output = mocks
|
||||
.completion
|
||||
.single_request()
|
||||
.function_call_output(tool_call_id);
|
||||
let output = call_output["output"].as_str().unwrap_or_default();
|
||||
assert!(
|
||||
output.contains("Execution denied: User denied execution"),
|
||||
"expected rejection marker in function_call_output: {output:?}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn unified_exec_zsh_fork_keeps_skill_loading_pinned_to_turn_cwd() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let Some(runtime) = zsh_fork_runtime("unified exec zsh-fork turn cwd skill test")? else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let tool_call_id = "uexec-zsh-fork-repo-skill-call";
|
||||
let test = build_unified_exec_zsh_fork_test(
|
||||
&server,
|
||||
runtime,
|
||||
AskForApproval::OnRequest,
|
||||
SandboxPolicy::new_workspace_write_policy(),
|
||||
|_| {},
|
||||
)
|
||||
.await?;
|
||||
|
||||
let repo_root = test.cwd_path().join("repo");
|
||||
let script_path = write_repo_skill_with_shell_script_contents(
|
||||
&repo_root,
|
||||
"repo-skill",
|
||||
"repo-skill.sh",
|
||||
"#!/bin/sh\necho 'repo-skill-output'\n",
|
||||
)?;
|
||||
let script_path_quoted = shlex::try_join([script_path.to_string_lossy().as_ref()])?;
|
||||
let repo_root_quoted = shlex::try_join([repo_root.to_string_lossy().as_ref()])?;
|
||||
let command = format!("cd {repo_root_quoted} && {script_path_quoted}");
|
||||
let arguments = exec_command_arguments(&command)?;
|
||||
let mocks =
|
||||
mount_function_call_agent_response(&server, tool_call_id, &arguments, "exec_command").await;
|
||||
|
||||
submit_turn_with_policies(
|
||||
&test,
|
||||
"run the repo skill after changing directories",
|
||||
AskForApproval::OnRequest,
|
||||
SandboxPolicy::new_workspace_write_policy(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let approval = wait_for_exec_approval_request(&test).await;
|
||||
assert!(
|
||||
approval.is_none(),
|
||||
"changing directories inside unified exec should not load repo-local skills from the shell cwd",
|
||||
);
|
||||
|
||||
let call_output = mocks
|
||||
.completion
|
||||
.single_request()
|
||||
.function_call_output(tool_call_id);
|
||||
let output = call_output["output"].as_str().unwrap_or_default();
|
||||
assert!(
|
||||
output.contains("repo-skill-output"),
|
||||
"expected repo skill script to run without skill-specific approval when only the shell cwd changes: {output:?}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Look for `additional_permissions == None`, then verify that both the first
|
||||
/// run and the cached session-approval rerun stay inside the turn sandbox.
|
||||
#[cfg(unix)]
|
||||
|
||||
Reference in New Issue
Block a user