#[cfg(unix)] use std::os::unix::process::ExitStatusExt; use std::collections::HashMap; use std::io; use std::path::Path; use std::path::PathBuf; use std::process::ExitStatus; use std::time::Duration; use std::time::Instant; use async_channel::Sender; use tokio::io::AsyncRead; use tokio::io::AsyncReadExt; use tokio::io::BufReader; use tokio::process::Child; use tokio_util::sync::CancellationToken; use crate::error::CodexErr; use crate::error::Result; use crate::error::SandboxErr; use crate::get_platform_sandbox; use crate::protocol::Event; use crate::protocol::EventMsg; use crate::protocol::ExecCommandOutputDeltaEvent; use crate::protocol::ExecOutputStream; use crate::protocol::SandboxPolicy; use crate::sandboxing::CommandSpec; use crate::sandboxing::ExecRequest; use crate::sandboxing::SandboxManager; use crate::sandboxing::SandboxPermissions; use crate::spawn::SpawnChildRequest; use crate::spawn::StdioPolicy; use crate::spawn::spawn_child_async; use crate::text_encoding::bytes_to_string_smart; use codex_network_proxy::NetworkProxy; use codex_utils_pty::process_group::kill_child_process_group; pub const DEFAULT_EXEC_COMMAND_TIMEOUT_MS: u64 = 10_000; // Hardcode these since it does not seem worth including the libc crate just // for these. const SIGKILL_CODE: i32 = 9; const TIMEOUT_CODE: i32 = 64; const EXIT_CODE_SIGNAL_BASE: i32 = 128; // conventional shell: 128 + signal const EXEC_TIMEOUT_EXIT_CODE: i32 = 124; // conventional timeout exit code // I/O buffer sizing const READ_CHUNK_SIZE: usize = 8192; // bytes per read const AGGREGATE_BUFFER_INITIAL_CAPACITY: usize = 8 * 1024; // 8 KiB /// Hard cap on bytes retained from exec stdout/stderr/aggregated output. /// /// This mirrors unified exec's output cap so a single runaway command cannot /// OOM the process by dumping huge amounts of data to stdout/stderr. const EXEC_OUTPUT_MAX_BYTES: usize = 1024 * 1024; // 1 MiB /// Limit the number of ExecCommandOutputDelta events emitted per exec call. /// Aggregation still collects full output; only the live event stream is capped. pub(crate) const MAX_EXEC_OUTPUT_DELTAS_PER_CALL: usize = 10_000; #[derive(Debug)] pub struct ExecParams { pub command: Vec, pub cwd: PathBuf, pub expiration: ExecExpiration, pub env: HashMap, pub network: Option, pub sandbox_permissions: SandboxPermissions, pub windows_sandbox_level: codex_protocol::config_types::WindowsSandboxLevel, pub justification: Option, pub arg0: Option, } /// Mechanism to terminate an exec invocation before it finishes naturally. #[derive(Clone, Debug)] pub enum ExecExpiration { Timeout(Duration), DefaultTimeout, Cancellation(CancellationToken), } impl From> for ExecExpiration { fn from(timeout_ms: Option) -> Self { timeout_ms.map_or(ExecExpiration::DefaultTimeout, |timeout_ms| { ExecExpiration::Timeout(Duration::from_millis(timeout_ms)) }) } } impl From for ExecExpiration { fn from(timeout_ms: u64) -> Self { ExecExpiration::Timeout(Duration::from_millis(timeout_ms)) } } impl ExecExpiration { pub(crate) async fn wait(self) { match self { ExecExpiration::Timeout(duration) => tokio::time::sleep(duration).await, ExecExpiration::DefaultTimeout => { tokio::time::sleep(Duration::from_millis(DEFAULT_EXEC_COMMAND_TIMEOUT_MS)).await } ExecExpiration::Cancellation(cancel) => { cancel.cancelled().await; } } } /// If ExecExpiration is a timeout, returns the timeout in milliseconds. pub(crate) fn timeout_ms(&self) -> Option { match self { ExecExpiration::Timeout(duration) => Some(duration.as_millis() as u64), ExecExpiration::DefaultTimeout => Some(DEFAULT_EXEC_COMMAND_TIMEOUT_MS), ExecExpiration::Cancellation(_) => None, } } } #[derive(Clone, Copy, Debug, PartialEq)] pub enum SandboxType { None, /// Only available on macOS. MacosSeatbelt, /// Only available on Linux. LinuxSeccomp, /// Only available on Windows. WindowsRestrictedToken, } impl SandboxType { pub(crate) fn as_metric_tag(self) -> &'static str { match self { SandboxType::None => "none", SandboxType::MacosSeatbelt => "seatbelt", SandboxType::LinuxSeccomp => "seccomp", SandboxType::WindowsRestrictedToken => "windows_sandbox", } } } #[derive(Clone)] pub struct StdoutStream { pub sub_id: String, pub call_id: String, pub tx_event: Sender, } pub async fn process_exec_tool_call( params: ExecParams, sandbox_policy: &SandboxPolicy, sandbox_cwd: &Path, codex_linux_sandbox_exe: &Option, use_linux_sandbox_bwrap: bool, stdout_stream: Option, ) -> Result { let windows_sandbox_level = params.windows_sandbox_level; let enforce_managed_network = params.network.is_some(); let sandbox_type = match &sandbox_policy { SandboxPolicy::DangerFullAccess | SandboxPolicy::ExternalSandbox { .. } => { if enforce_managed_network { get_platform_sandbox( windows_sandbox_level != codex_protocol::config_types::WindowsSandboxLevel::Disabled, ) .unwrap_or(SandboxType::None) } else { SandboxType::None } } _ => get_platform_sandbox( windows_sandbox_level != codex_protocol::config_types::WindowsSandboxLevel::Disabled, ) .unwrap_or(SandboxType::None), }; tracing::debug!("Sandbox type: {sandbox_type:?}"); let ExecParams { command, cwd, mut env, expiration, network, sandbox_permissions, windows_sandbox_level, justification, arg0: _, } = params; if let Some(network) = network.as_ref() { network.apply_to_env(&mut env); } let (program, args) = command.split_first().ok_or_else(|| { CodexErr::Io(io::Error::new( io::ErrorKind::InvalidInput, "command args are empty", )) })?; let spec = CommandSpec { program: program.clone(), args: args.to_vec(), cwd, env, expiration, sandbox_permissions, additional_permissions: None, justification, }; let manager = SandboxManager::new(); let exec_req = manager .transform(crate::sandboxing::SandboxTransformRequest { spec, policy: sandbox_policy, sandbox: sandbox_type, enforce_managed_network, network: network.as_ref(), sandbox_policy_cwd: sandbox_cwd, #[cfg(target_os = "macos")] macos_seatbelt_profile_extensions: None, codex_linux_sandbox_exe: codex_linux_sandbox_exe.as_ref(), use_linux_sandbox_bwrap, windows_sandbox_level, }) .map_err(CodexErr::from)?; // Route through the sandboxing module for a single, unified execution path. crate::sandboxing::execute_env(exec_req, stdout_stream).await } pub(crate) async fn execute_exec_env( env: ExecRequest, sandbox_policy: &SandboxPolicy, stdout_stream: Option, ) -> Result { let ExecRequest { command, cwd, env, network, expiration, sandbox, windows_sandbox_level, sandbox_permissions, sandbox_policy: _sandbox_policy_from_env, justification, arg0, } = env; let params = ExecParams { command, cwd, expiration, env, network: network.clone(), sandbox_permissions, windows_sandbox_level, justification, arg0, }; let start = Instant::now(); let raw_output_result = exec(params, sandbox, sandbox_policy, stdout_stream).await; let duration = start.elapsed(); finalize_exec_result(raw_output_result, sandbox, duration) } #[cfg(target_os = "windows")] fn extract_create_process_as_user_error_code(err: &str) -> Option { let marker = "CreateProcessAsUserW failed: "; let start = err.find(marker)? + marker.len(); let tail = &err[start..]; let digits: String = tail.chars().take_while(char::is_ascii_digit).collect(); if digits.is_empty() { None } else { Some(digits) } } #[cfg(target_os = "windows")] fn windowsapps_path_kind(path: &str) -> &'static str { let lower = path.to_ascii_lowercase(); if lower.contains("\\program files\\windowsapps\\") { return "windowsapps_package"; } if lower.contains("\\appdata\\local\\microsoft\\windowsapps\\") { return "windowsapps_alias"; } if lower.contains("\\windowsapps\\") { return "windowsapps_other"; } "other" } #[cfg(target_os = "windows")] fn record_windows_sandbox_spawn_failure( command_path: Option<&str>, windows_sandbox_level: codex_protocol::config_types::WindowsSandboxLevel, err: &str, ) { let Some(error_code) = extract_create_process_as_user_error_code(err) else { return; }; let path = command_path.unwrap_or("unknown"); let exe = Path::new(path) .file_name() .and_then(|name| name.to_str()) .unwrap_or("unknown") .to_ascii_lowercase(); let path_kind = windowsapps_path_kind(path); let level = if matches!( windows_sandbox_level, codex_protocol::config_types::WindowsSandboxLevel::Elevated ) { "elevated" } else { "legacy" }; if let Some(metrics) = codex_otel::metrics::global() { let _ = metrics.counter( "codex.windows_sandbox.createprocessasuserw_failed", 1, &[ ("error_code", error_code.as_str()), ("path_kind", path_kind), ("exe", exe.as_str()), ("level", level), ], ); } } #[cfg(target_os = "windows")] async fn exec_windows_sandbox( params: ExecParams, sandbox_policy: &SandboxPolicy, ) -> Result { use crate::config::find_codex_home; use codex_protocol::config_types::WindowsSandboxLevel; use codex_windows_sandbox::run_windows_sandbox_capture; use codex_windows_sandbox::run_windows_sandbox_capture_elevated; let ExecParams { command, cwd, mut env, network, expiration, windows_sandbox_level, .. } = params; if let Some(network) = network.as_ref() { network.apply_to_env(&mut env); } // TODO(iceweasel-oai): run_windows_sandbox_capture should support all // variants of ExecExpiration, not just timeout. let timeout_ms = expiration.timeout_ms(); let policy_str = serde_json::to_string(sandbox_policy).map_err(|err| { CodexErr::Io(io::Error::other(format!( "failed to serialize Windows sandbox policy: {err}" ))) })?; let sandbox_cwd = cwd.clone(); let codex_home = find_codex_home().map_err(|err| { CodexErr::Io(io::Error::other(format!( "windows sandbox: failed to resolve codex_home: {err}" ))) })?; let command_path = command.first().cloned(); let sandbox_level = windows_sandbox_level; let use_elevated = matches!(sandbox_level, WindowsSandboxLevel::Elevated); let spawn_res = tokio::task::spawn_blocking(move || { if use_elevated { run_windows_sandbox_capture_elevated( policy_str.as_str(), &sandbox_cwd, codex_home.as_ref(), command, &cwd, env, timeout_ms, ) } else { run_windows_sandbox_capture( policy_str.as_str(), &sandbox_cwd, codex_home.as_ref(), command, &cwd, env, timeout_ms, ) } }) .await; let capture = match spawn_res { Ok(Ok(v)) => v, Ok(Err(err)) => { record_windows_sandbox_spawn_failure( command_path.as_deref(), sandbox_level, &err.to_string(), ); return Err(CodexErr::Io(io::Error::other(format!( "windows sandbox: {err}" )))); } Err(join_err) => { return Err(CodexErr::Io(io::Error::other(format!( "windows sandbox join error: {join_err}" )))); } }; let exit_status = synthetic_exit_status(capture.exit_code); let mut stdout_text = capture.stdout; if stdout_text.len() > EXEC_OUTPUT_MAX_BYTES { stdout_text.truncate(EXEC_OUTPUT_MAX_BYTES); } let mut stderr_text = capture.stderr; if stderr_text.len() > EXEC_OUTPUT_MAX_BYTES { stderr_text.truncate(EXEC_OUTPUT_MAX_BYTES); } let stdout = StreamOutput { text: stdout_text, truncated_after_lines: None, }; let stderr = StreamOutput { text: stderr_text, truncated_after_lines: None, }; let aggregated_output = aggregate_output(&stdout, &stderr); Ok(RawExecToolCallOutput { exit_status, stdout, stderr, aggregated_output, timed_out: capture.timed_out, }) } fn finalize_exec_result( raw_output_result: std::result::Result, sandbox_type: SandboxType, duration: Duration, ) -> Result { match raw_output_result { Ok(raw_output) => { #[allow(unused_mut)] let mut timed_out = raw_output.timed_out; #[cfg(target_family = "unix")] { if let Some(signal) = raw_output.exit_status.signal() { if signal == TIMEOUT_CODE { timed_out = true; } else { return Err(CodexErr::Sandbox(SandboxErr::Signal(signal))); } } } let mut exit_code = raw_output.exit_status.code().unwrap_or(-1); if timed_out { exit_code = EXEC_TIMEOUT_EXIT_CODE; } let stdout = raw_output.stdout.from_utf8_lossy(); let stderr = raw_output.stderr.from_utf8_lossy(); let aggregated_output = raw_output.aggregated_output.from_utf8_lossy(); let exec_output = ExecToolCallOutput { exit_code, stdout, stderr, aggregated_output, duration, timed_out, }; if timed_out { return Err(CodexErr::Sandbox(SandboxErr::Timeout { output: Box::new(exec_output), })); } if is_likely_sandbox_denied(sandbox_type, &exec_output) { return Err(CodexErr::Sandbox(SandboxErr::Denied { output: Box::new(exec_output), network_policy_decision: None, })); } Ok(exec_output) } Err(err) => { tracing::error!("exec error: {err}"); Err(err) } } } pub(crate) mod errors { use super::CodexErr; use crate::sandboxing::SandboxTransformError; impl From for CodexErr { fn from(err: SandboxTransformError) -> Self { match err { SandboxTransformError::MissingLinuxSandboxExecutable => { CodexErr::LandlockSandboxExecutableNotProvided } #[cfg(not(target_os = "macos"))] SandboxTransformError::SeatbeltUnavailable => CodexErr::UnsupportedOperation( "seatbelt sandbox is only available on macOS".to_string(), ), } } } } /// We don't have a fully deterministic way to tell if our command failed /// because of the sandbox - a command in the user's zshrc file might hit an /// error, but the command itself might fail or succeed for other reasons. /// For now, we conservatively check for well known command failure exit codes and /// also look for common sandbox denial keywords in the command output. pub(crate) fn is_likely_sandbox_denied( sandbox_type: SandboxType, exec_output: &ExecToolCallOutput, ) -> bool { if sandbox_type == SandboxType::None || exec_output.exit_code == 0 { return false; } // Quick rejects: well-known non-sandbox shell exit codes // 2: misuse of shell builtins // 126: permission denied // 127: command not found const SANDBOX_DENIED_KEYWORDS: [&str; 7] = [ "operation not permitted", "permission denied", "read-only file system", "seccomp", "sandbox", "landlock", "failed to write file", ]; let has_sandbox_keyword = [ &exec_output.stderr.text, &exec_output.stdout.text, &exec_output.aggregated_output.text, ] .into_iter() .any(|section| { let lower = section.to_lowercase(); SANDBOX_DENIED_KEYWORDS .iter() .any(|needle| lower.contains(needle)) }); if has_sandbox_keyword { return true; } const QUICK_REJECT_EXIT_CODES: [i32; 3] = [2, 126, 127]; if QUICK_REJECT_EXIT_CODES.contains(&exec_output.exit_code) { return false; } #[cfg(unix)] { const SIGSYS_CODE: i32 = libc::SIGSYS; if sandbox_type == SandboxType::LinuxSeccomp && exec_output.exit_code == EXIT_CODE_SIGNAL_BASE + SIGSYS_CODE { return true; } } false } #[derive(Debug, Clone)] pub struct StreamOutput { pub text: T, pub truncated_after_lines: Option, } #[derive(Debug)] struct RawExecToolCallOutput { pub exit_status: ExitStatus, pub stdout: StreamOutput>, pub stderr: StreamOutput>, pub aggregated_output: StreamOutput>, pub timed_out: bool, } impl StreamOutput { pub fn new(text: String) -> Self { Self { text, truncated_after_lines: None, } } } impl StreamOutput> { pub fn from_utf8_lossy(&self) -> StreamOutput { StreamOutput { text: bytes_to_string_smart(&self.text), truncated_after_lines: self.truncated_after_lines, } } } #[inline] fn append_capped(dst: &mut Vec, src: &[u8], max_bytes: usize) { if dst.len() >= max_bytes { return; } let remaining = max_bytes.saturating_sub(dst.len()); let take = remaining.min(src.len()); dst.extend_from_slice(&src[..take]); } fn aggregate_output( stdout: &StreamOutput>, stderr: &StreamOutput>, ) -> StreamOutput> { let total_len = stdout.text.len().saturating_add(stderr.text.len()); let max_bytes = EXEC_OUTPUT_MAX_BYTES; let mut aggregated = Vec::with_capacity(total_len.min(max_bytes)); if total_len <= max_bytes { aggregated.extend_from_slice(&stdout.text); aggregated.extend_from_slice(&stderr.text); return StreamOutput { text: aggregated, truncated_after_lines: None, }; } // Under contention, reserve 1/3 for stdout and 2/3 for stderr; rebalance unused stderr to stdout. let want_stdout = stdout.text.len().min(max_bytes / 3); let want_stderr = stderr.text.len(); let stderr_take = want_stderr.min(max_bytes.saturating_sub(want_stdout)); let remaining = max_bytes.saturating_sub(want_stdout + stderr_take); let stdout_take = want_stdout + remaining.min(stdout.text.len().saturating_sub(want_stdout)); aggregated.extend_from_slice(&stdout.text[..stdout_take]); aggregated.extend_from_slice(&stderr.text[..stderr_take]); StreamOutput { text: aggregated, truncated_after_lines: None, } } #[derive(Clone, Debug)] pub struct ExecToolCallOutput { pub exit_code: i32, pub stdout: StreamOutput, pub stderr: StreamOutput, pub aggregated_output: StreamOutput, pub duration: Duration, pub timed_out: bool, } impl Default for ExecToolCallOutput { fn default() -> Self { Self { exit_code: 0, stdout: StreamOutput::new(String::new()), stderr: StreamOutput::new(String::new()), aggregated_output: StreamOutput::new(String::new()), duration: Duration::ZERO, timed_out: false, } } } #[cfg_attr(not(target_os = "windows"), allow(unused_variables))] async fn exec( params: ExecParams, sandbox: SandboxType, sandbox_policy: &SandboxPolicy, stdout_stream: Option, ) -> Result { #[cfg(target_os = "windows")] if sandbox == SandboxType::WindowsRestrictedToken && !matches!( sandbox_policy, SandboxPolicy::DangerFullAccess | SandboxPolicy::ExternalSandbox { .. } ) { return exec_windows_sandbox(params, sandbox_policy).await; } let ExecParams { command, cwd, mut env, network, arg0, expiration, windows_sandbox_level: _, .. } = params; if let Some(network) = network.as_ref() { network.apply_to_env(&mut env); } let (program, args) = command.split_first().ok_or_else(|| { CodexErr::Io(io::Error::new( io::ErrorKind::InvalidInput, "command args are empty", )) })?; let arg0_ref = arg0.as_deref(); let child = spawn_child_async(SpawnChildRequest { program: PathBuf::from(program), args: args.into(), arg0: arg0_ref, cwd, sandbox_policy, // The environment already has attempt-scoped proxy settings from // apply_to_env_for_attempt above. Passing network here would reapply // non-attempt proxy vars and drop attempt correlation metadata. network: None, stdio_policy: StdioPolicy::RedirectForShellTool, env, }) .await?; consume_truncated_output(child, expiration, stdout_stream).await } /// Consumes the output of a child process, truncating it so it is suitable for /// use as the output of a `shell` tool call. Also enforces specified timeout. async fn consume_truncated_output( mut child: Child, expiration: ExecExpiration, stdout_stream: Option, ) -> Result { // Both stdout and stderr were configured with `Stdio::piped()` // above, therefore `take()` should normally return `Some`. If it doesn't // we treat it as an exceptional I/O error let stdout_reader = child.stdout.take().ok_or_else(|| { CodexErr::Io(io::Error::other( "stdout pipe was unexpectedly not available", )) })?; let stderr_reader = child.stderr.take().ok_or_else(|| { CodexErr::Io(io::Error::other( "stderr pipe was unexpectedly not available", )) })?; let stdout_handle = tokio::spawn(read_capped( BufReader::new(stdout_reader), stdout_stream.clone(), false, )); let stderr_handle = tokio::spawn(read_capped( BufReader::new(stderr_reader), stdout_stream.clone(), true, )); let (exit_status, timed_out) = tokio::select! { status_result = child.wait() => { let exit_status = status_result?; (exit_status, false) } _ = expiration.wait() => { kill_child_process_group(&mut child)?; child.start_kill()?; (synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + TIMEOUT_CODE), true) } _ = tokio::signal::ctrl_c() => { kill_child_process_group(&mut child)?; child.start_kill()?; (synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + SIGKILL_CODE), false) } }; // Wait for the stdout/stderr collection tasks but guard against them // hanging forever. In the normal case, both pipes are closed once the child // terminates so the tasks exit quickly. However, if the child process // spawned grandchildren that inherited its stdout/stderr file descriptors // those pipes may stay open after we `kill` the direct child on timeout. // That would cause the `read_capped` tasks to block on `read()` // indefinitely, effectively hanging the whole agent. const IO_DRAIN_TIMEOUT_MS: u64 = 2_000; // 2 s should be plenty for local pipes // We need mutable bindings so we can `abort()` them on timeout. use tokio::task::JoinHandle; async fn await_with_timeout( handle: &mut JoinHandle>>>, timeout: Duration, ) -> std::io::Result>> { match tokio::time::timeout(timeout, &mut *handle).await { Ok(join_res) => match join_res { Ok(io_res) => io_res, Err(join_err) => Err(std::io::Error::other(join_err)), }, Err(_elapsed) => { // Timeout: abort the task to avoid hanging on open pipes. handle.abort(); Ok(StreamOutput { text: Vec::new(), truncated_after_lines: None, }) } } } let mut stdout_handle = stdout_handle; let mut stderr_handle = stderr_handle; let stdout = await_with_timeout( &mut stdout_handle, Duration::from_millis(IO_DRAIN_TIMEOUT_MS), ) .await?; let stderr = await_with_timeout( &mut stderr_handle, Duration::from_millis(IO_DRAIN_TIMEOUT_MS), ) .await?; let aggregated_output = aggregate_output(&stdout, &stderr); Ok(RawExecToolCallOutput { exit_status, stdout, stderr, aggregated_output, timed_out, }) } async fn read_capped( mut reader: R, stream: Option, is_stderr: bool, ) -> io::Result>> { let mut buf = Vec::with_capacity(AGGREGATE_BUFFER_INITIAL_CAPACITY.min(EXEC_OUTPUT_MAX_BYTES)); let mut tmp = [0u8; READ_CHUNK_SIZE]; let mut emitted_deltas: usize = 0; loop { let n = reader.read(&mut tmp).await?; if n == 0 { break; } if let Some(stream) = &stream && emitted_deltas < MAX_EXEC_OUTPUT_DELTAS_PER_CALL { let chunk = tmp[..n].to_vec(); let msg = EventMsg::ExecCommandOutputDelta(ExecCommandOutputDeltaEvent { call_id: stream.call_id.clone(), stream: if is_stderr { ExecOutputStream::Stderr } else { ExecOutputStream::Stdout }, chunk, }); let event = Event { id: stream.sub_id.clone(), msg, }; #[allow(clippy::let_unit_value)] let _ = stream.tx_event.send(event).await; emitted_deltas += 1; } append_capped(&mut buf, &tmp[..n], EXEC_OUTPUT_MAX_BYTES); // Continue reading to EOF to avoid back-pressure } Ok(StreamOutput { text: buf, truncated_after_lines: None, }) } #[cfg(unix)] fn synthetic_exit_status(code: i32) -> ExitStatus { use std::os::unix::process::ExitStatusExt; std::process::ExitStatus::from_raw(code) } #[cfg(windows)] fn synthetic_exit_status(code: i32) -> ExitStatus { use std::os::windows::process::ExitStatusExt; // On Windows the raw status is a u32. Use a direct cast to avoid // panicking on negative i32 values produced by prior narrowing casts. std::process::ExitStatus::from_raw(code as u32) } #[cfg(test)] mod tests { use super::*; use pretty_assertions::assert_eq; use std::time::Duration; use tokio::io::AsyncWriteExt; fn make_exec_output( exit_code: i32, stdout: &str, stderr: &str, aggregated: &str, ) -> ExecToolCallOutput { ExecToolCallOutput { exit_code, stdout: StreamOutput::new(stdout.to_string()), stderr: StreamOutput::new(stderr.to_string()), aggregated_output: StreamOutput::new(aggregated.to_string()), duration: Duration::from_millis(1), timed_out: false, } } #[test] fn sandbox_detection_requires_keywords() { let output = make_exec_output(1, "", "", ""); assert!(!is_likely_sandbox_denied( SandboxType::LinuxSeccomp, &output )); } #[test] fn sandbox_detection_identifies_keyword_in_stderr() { let output = make_exec_output(1, "", "Operation not permitted", ""); assert!(is_likely_sandbox_denied(SandboxType::LinuxSeccomp, &output)); } #[test] fn sandbox_detection_respects_quick_reject_exit_codes() { let output = make_exec_output(127, "", "command not found", ""); assert!(!is_likely_sandbox_denied( SandboxType::LinuxSeccomp, &output )); } #[test] fn sandbox_detection_ignores_non_sandbox_mode() { let output = make_exec_output(1, "", "Operation not permitted", ""); assert!(!is_likely_sandbox_denied(SandboxType::None, &output)); } #[test] fn sandbox_detection_ignores_network_policy_text_in_non_sandbox_mode() { let output = make_exec_output( 0, "", "", r#"CODEX_NETWORK_POLICY_DECISION {"decision":"ask","reason":"not_allowed","source":"decider","protocol":"http","host":"google.com","port":80}"#, ); assert!(!is_likely_sandbox_denied(SandboxType::None, &output)); } #[test] fn sandbox_detection_uses_aggregated_output() { let output = make_exec_output( 101, "", "", "cargo failed: Read-only file system when writing target", ); assert!(is_likely_sandbox_denied( SandboxType::MacosSeatbelt, &output )); } #[test] fn sandbox_detection_ignores_network_policy_text_with_zero_exit_code() { let output = make_exec_output( 0, "", "", r#"CODEX_NETWORK_POLICY_DECISION {"decision":"ask","source":"decider","protocol":"http","host":"google.com","port":80}"#, ); assert!(!is_likely_sandbox_denied( SandboxType::LinuxSeccomp, &output )); } #[tokio::test] async fn read_capped_limits_retained_bytes() { let (mut writer, reader) = tokio::io::duplex(1024); let bytes = vec![b'a'; EXEC_OUTPUT_MAX_BYTES.saturating_add(128 * 1024)]; tokio::spawn(async move { writer.write_all(&bytes).await.expect("write"); }); let out = read_capped(reader, None, false).await.expect("read"); assert_eq!(out.text.len(), EXEC_OUTPUT_MAX_BYTES); } #[test] fn aggregate_output_prefers_stderr_on_contention() { let stdout = StreamOutput { text: vec![b'a'; EXEC_OUTPUT_MAX_BYTES], truncated_after_lines: None, }; let stderr = StreamOutput { text: vec![b'b'; EXEC_OUTPUT_MAX_BYTES], truncated_after_lines: None, }; let aggregated = aggregate_output(&stdout, &stderr); let stdout_cap = EXEC_OUTPUT_MAX_BYTES / 3; let stderr_cap = EXEC_OUTPUT_MAX_BYTES.saturating_sub(stdout_cap); assert_eq!(aggregated.text.len(), EXEC_OUTPUT_MAX_BYTES); assert_eq!(aggregated.text[..stdout_cap], vec![b'a'; stdout_cap]); assert_eq!(aggregated.text[stdout_cap..], vec![b'b'; stderr_cap]); } #[test] fn aggregate_output_fills_remaining_capacity_with_stderr() { let stdout_len = EXEC_OUTPUT_MAX_BYTES / 10; let stdout = StreamOutput { text: vec![b'a'; stdout_len], truncated_after_lines: None, }; let stderr = StreamOutput { text: vec![b'b'; EXEC_OUTPUT_MAX_BYTES], truncated_after_lines: None, }; let aggregated = aggregate_output(&stdout, &stderr); let stderr_cap = EXEC_OUTPUT_MAX_BYTES.saturating_sub(stdout_len); assert_eq!(aggregated.text.len(), EXEC_OUTPUT_MAX_BYTES); assert_eq!(aggregated.text[..stdout_len], vec![b'a'; stdout_len]); assert_eq!(aggregated.text[stdout_len..], vec![b'b'; stderr_cap]); } #[test] fn aggregate_output_rebalances_when_stderr_is_small() { let stdout = StreamOutput { text: vec![b'a'; EXEC_OUTPUT_MAX_BYTES], truncated_after_lines: None, }; let stderr = StreamOutput { text: vec![b'b'; 1], truncated_after_lines: None, }; let aggregated = aggregate_output(&stdout, &stderr); let stdout_len = EXEC_OUTPUT_MAX_BYTES.saturating_sub(1); assert_eq!(aggregated.text.len(), EXEC_OUTPUT_MAX_BYTES); assert_eq!(aggregated.text[..stdout_len], vec![b'a'; stdout_len]); assert_eq!(aggregated.text[stdout_len..], vec![b'b'; 1]); } #[test] fn aggregate_output_keeps_stdout_then_stderr_when_under_cap() { let stdout = StreamOutput { text: vec![b'a'; 4], truncated_after_lines: None, }; let stderr = StreamOutput { text: vec![b'b'; 3], truncated_after_lines: None, }; let aggregated = aggregate_output(&stdout, &stderr); let mut expected = Vec::new(); expected.extend_from_slice(&stdout.text); expected.extend_from_slice(&stderr.text); assert_eq!(aggregated.text, expected); assert_eq!(aggregated.truncated_after_lines, None); } #[cfg(unix)] #[test] fn sandbox_detection_flags_sigsys_exit_code() { let exit_code = EXIT_CODE_SIGNAL_BASE + libc::SIGSYS; let output = make_exec_output(exit_code, "", "", ""); assert!(is_likely_sandbox_denied(SandboxType::LinuxSeccomp, &output)); } #[cfg(unix)] #[tokio::test] async fn kill_child_process_group_kills_grandchildren_on_timeout() -> Result<()> { // On Linux/macOS, /bin/bash is typically present; on FreeBSD/OpenBSD, // prefer /bin/sh to avoid NotFound errors. #[cfg(any(target_os = "freebsd", target_os = "openbsd"))] let command = vec![ "/bin/sh".to_string(), "-c".to_string(), "sleep 60 & echo $!; sleep 60".to_string(), ]; #[cfg(all(unix, not(any(target_os = "freebsd", target_os = "openbsd"))))] let command = vec![ "/bin/bash".to_string(), "-c".to_string(), "sleep 60 & echo $!; sleep 60".to_string(), ]; let env: HashMap = std::env::vars().collect(); let params = ExecParams { command, cwd: std::env::current_dir()?, expiration: 500.into(), env, network: None, sandbox_permissions: SandboxPermissions::UseDefault, windows_sandbox_level: codex_protocol::config_types::WindowsSandboxLevel::Disabled, justification: None, arg0: None, }; let output = exec( params, SandboxType::None, &SandboxPolicy::new_read_only_policy(), None, ) .await?; assert!(output.timed_out); let stdout = output.stdout.from_utf8_lossy().text; let pid_line = stdout.lines().next().unwrap_or("").trim(); let pid: i32 = pid_line.parse().map_err(|error| { io::Error::new( io::ErrorKind::InvalidData, format!("Failed to parse pid from stdout '{pid_line}': {error}"), ) })?; let mut killed = false; for _ in 0..20 { // Use kill(pid, 0) to check if the process is alive. if unsafe { libc::kill(pid, 0) } == -1 && let Some(libc::ESRCH) = std::io::Error::last_os_error().raw_os_error() { killed = true; break; } tokio::time::sleep(Duration::from_millis(100)).await; } assert!(killed, "grandchild process with pid {pid} is still alive"); Ok(()) } #[tokio::test] async fn process_exec_tool_call_respects_cancellation_token() -> Result<()> { let command = long_running_command(); let cwd = std::env::current_dir()?; let env: HashMap = std::env::vars().collect(); let cancel_token = CancellationToken::new(); let cancel_tx = cancel_token.clone(); let params = ExecParams { command, cwd: cwd.clone(), expiration: ExecExpiration::Cancellation(cancel_token), env, network: None, sandbox_permissions: SandboxPermissions::UseDefault, windows_sandbox_level: codex_protocol::config_types::WindowsSandboxLevel::Disabled, justification: None, arg0: None, }; tokio::spawn(async move { tokio::time::sleep(Duration::from_millis(1_000)).await; cancel_tx.cancel(); }); let result = process_exec_tool_call( params, &SandboxPolicy::DangerFullAccess, cwd.as_path(), &None, false, None, ) .await; let output = match result { Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => output, other => panic!("expected timeout error, got {other:?}"), }; assert!(output.timed_out); assert_eq!(output.exit_code, EXEC_TIMEOUT_EXIT_CODE); Ok(()) } #[cfg(unix)] fn long_running_command() -> Vec { vec![ "/bin/sh".to_string(), "-c".to_string(), "sleep 30".to_string(), ] } #[cfg(windows)] fn long_running_command() -> Vec { vec![ "powershell.exe".to_string(), "-NonInteractive".to_string(), "-NoLogo".to_string(), "-Command".to_string(), "Start-Sleep -Seconds 30".to_string(), ] } }