Compare commits

...

3 Commits

Author SHA1 Message Date
Ahmed Ibrahim
7fbae94b61 Release 0.57.0 2025-11-09 14:07:45 -08:00
Luca King
a2fdfce02a Kill shell tool process groups on timeout (#5258)
## Summary
- launch shell tool processes in their own process group so Codex owns
the full tree
- on timeout or ctrl-c, send SIGKILL to the process group before
terminating the tracked child
- document that the default shell/unified_exec timeout remains 1000 ms

## Original Bug
Long-lived shell tool commands hang indefinitely because the timeout
handler only terminated the direct child process; any grandchildren it
spawned kept running and held the PTY open, preventing Codex from
regaining control.

## Repro Original Bug
Install next.js and run `next dev` (which is a long-running shell
process with children). On openai:main, it will cause the agent to
permanently get stuck here until human intervention. On this branch,
this command will be terminated successfully after timeout_ms which will
unblock the agent. This is a critical fix for unmonitored / lightly
monitored agents that don't have immediate human observation to unblock
them.

---------

Co-authored-by: Michael Bolin <mbolin@openai.com>
Co-authored-by: Michael Bolin <bolinfest@gmail.com>
2025-11-07 17:54:35 -08:00
pakrym-oai
91b16b8682 Don't request approval for safe commands in unified exec (#6380) 2025-11-07 16:36:04 -08:00
7 changed files with 283 additions and 75 deletions

View File

@@ -43,7 +43,7 @@ members = [
resolver = "2"
[workspace.package]
version = "0.0.0"
version = "0.57.0"
# Track the edition for all workspace crates in one place. Individual
# crates can still override this value, but keeping it here means new
# crates created with `cargo new -w ...` automatically inherit the 2024

View File

@@ -1,4 +1,38 @@
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::SandboxPolicy;
use crate::bash::parse_shell_lc_plain_commands;
use crate::is_safe_command::is_known_safe_command;
pub fn requires_initial_appoval(
policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
command: &[String],
with_escalated_permissions: bool,
) -> bool {
if is_known_safe_command(command) {
return false;
}
match policy {
AskForApproval::Never | AskForApproval::OnFailure => false,
AskForApproval::OnRequest => {
// In DangerFullAccess, only prompt if the command looks dangerous.
if matches!(sandbox_policy, SandboxPolicy::DangerFullAccess) {
return command_might_be_dangerous(command);
}
// In restricted sandboxes (ReadOnly/WorkspaceWrite), do not prompt for
// nonescalated, nondangerous commands — let the sandbox enforce
// restrictions (e.g., block network/write) without a user prompt.
let wants_escalation: bool = with_escalated_permissions;
if wants_escalation {
return true;
}
command_might_be_dangerous(command)
}
AskForApproval::UnlessTrusted => !is_known_safe_command(command),
}
}
pub fn command_might_be_dangerous(command: &[String]) -> bool {
if is_dangerous_to_call_with_exec(command) {

View File

@@ -518,6 +518,7 @@ async fn consume_truncated_output(
}
Err(_) => {
// timeout
kill_child_process_group(&mut child)?;
child.start_kill()?;
// Debatable whether `child.wait().await` should be called here.
(synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + TIMEOUT_CODE), true)
@@ -525,6 +526,7 @@ async fn consume_truncated_output(
}
}
_ = tokio::signal::ctrl_c() => {
kill_child_process_group(&mut child)?;
child.start_kill()?;
(synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + SIGKILL_CODE), false)
}
@@ -621,6 +623,38 @@ fn synthetic_exit_status(code: i32) -> ExitStatus {
std::process::ExitStatus::from_raw(code as u32)
}
#[cfg(unix)]
fn kill_child_process_group(child: &mut Child) -> io::Result<()> {
use std::io::ErrorKind;
if let Some(pid) = child.id() {
let pid = pid as libc::pid_t;
let pgid = unsafe { libc::getpgid(pid) };
if pgid == -1 {
let err = std::io::Error::last_os_error();
if err.kind() != ErrorKind::NotFound {
return Err(err);
}
return Ok(());
}
let result = unsafe { libc::killpg(pgid, libc::SIGKILL) };
if result == -1 {
let err = std::io::Error::last_os_error();
if err.kind() != ErrorKind::NotFound {
return Err(err);
}
}
}
Ok(())
}
#[cfg(not(unix))]
fn kill_child_process_group(_: &mut Child) -> io::Result<()> {
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
@@ -693,4 +727,51 @@ mod tests {
let output = make_exec_output(exit_code, "", "", "");
assert!(is_likely_sandbox_denied(SandboxType::LinuxSeccomp, &output));
}
#[cfg(unix)]
#[tokio::test]
async fn kill_child_process_group_kills_grandchildren_on_timeout() -> Result<()> {
let command = vec![
"/bin/bash".to_string(),
"-c".to_string(),
"sleep 60 & echo $!; sleep 60".to_string(),
];
let env: HashMap<String, String> = std::env::vars().collect();
let params = ExecParams {
command,
cwd: std::env::current_dir()?,
timeout_ms: Some(500),
env,
with_escalated_permissions: None,
justification: None,
arg0: None,
};
let output = exec(params, SandboxType::None, &SandboxPolicy::ReadOnly, None).await?;
assert!(output.timed_out);
let stdout = output.stdout.from_utf8_lossy().text;
let pid_line = stdout.lines().next().unwrap_or("").trim();
let pid: i32 = pid_line.parse().map_err(|error| {
io::Error::new(
io::ErrorKind::InvalidData,
format!("Failed to parse pid from stdout '{pid_line}': {error}"),
)
})?;
let mut killed = false;
for _ in 0..20 {
// Use kill(pid, 0) to check if the process is alive.
if unsafe { libc::kill(pid, 0) } == -1
&& let Some(libc::ESRCH) = std::io::Error::last_os_error().raw_os_error()
{
killed = true;
break;
}
tokio::time::sleep(Duration::from_millis(100)).await;
}
assert!(killed, "grandchild process with pid {pid} is still alive");
Ok(())
}
}

View File

@@ -64,24 +64,32 @@ pub(crate) async fn spawn_child_async(
// any child processes that were spawned as part of a `"shell"` tool call
// to also be terminated.
// This relies on prctl(2), so it only works on Linux.
#[cfg(target_os = "linux")]
#[cfg(unix)]
unsafe {
#[cfg(target_os = "linux")]
let parent_pid = libc::getpid();
cmd.pre_exec(move || {
// This prctl call effectively requests, "deliver SIGTERM when my
// current parent dies."
if libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM) == -1 {
if libc::setpgid(0, 0) == -1 {
return Err(std::io::Error::last_os_error());
}
// Though if there was a race condition and this pre_exec() block is
// run _after_ the parent (i.e., the Codex process) has already
// exited, then parent will be the closest configured "subreaper"
// ancestor process, or PID 1 (init). If the Codex process has exited
// already, so should the child process.
if libc::getppid() != parent_pid {
libc::raise(libc::SIGTERM);
// This relies on prctl(2), so it only works on Linux.
#[cfg(target_os = "linux")]
{
// This prctl call effectively requests, "deliver SIGTERM when my
// current parent dies."
if libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM) == -1 {
return Err(std::io::Error::last_os_error());
}
// Though if there was a race condition and this pre_exec() block is
// run _after_ the parent (i.e., the Codex process) has already
// exited, then parent will be the closest configured "subreaper"
// ancestor process, or PID 1 (init). If the Codex process has exited
// already, so should the child process.
if libc::getppid() != parent_pid {
libc::raise(libc::SIGTERM);
}
}
Ok(())
});

View File

@@ -4,8 +4,7 @@ Runtime: shell
Executes shell requests under the orchestrator: asks for approval when needed,
builds a CommandSpec, and runs it under the current SandboxAttempt.
*/
use crate::command_safety::is_dangerous_command::command_might_be_dangerous;
use crate::command_safety::is_safe_command::is_known_safe_command;
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
use crate::exec::ExecToolCallOutput;
use crate::protocol::SandboxPolicy;
use crate::sandboxing::execute_env;
@@ -121,28 +120,12 @@ impl Approvable<ShellRequest> for ShellRuntime {
policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
) -> bool {
if is_known_safe_command(&req.command) {
return false;
}
match policy {
AskForApproval::Never | AskForApproval::OnFailure => false,
AskForApproval::OnRequest => {
// In DangerFullAccess, only prompt if the command looks dangerous.
if matches!(sandbox_policy, SandboxPolicy::DangerFullAccess) {
return command_might_be_dangerous(&req.command);
}
// In restricted sandboxes (ReadOnly/WorkspaceWrite), do not prompt for
// nonescalated, nondangerous commands — let the sandbox enforce
// restrictions (e.g., block network/write) without a user prompt.
let wants_escalation = req.with_escalated_permissions.unwrap_or(false);
if wants_escalation {
return true;
}
command_might_be_dangerous(&req.command)
}
AskForApproval::UnlessTrusted => !is_known_safe_command(&req.command),
}
requires_initial_appoval(
policy,
sandbox_policy,
&req.command,
req.with_escalated_permissions.unwrap_or(false),
)
}
fn wants_escalated_first_attempt(&self, req: &ShellRequest) -> bool {

View File

@@ -1,3 +1,4 @@
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
/*
Runtime: unified exec
@@ -21,7 +22,9 @@ use crate::tools::sandboxing::with_cached_approval;
use crate::unified_exec::UnifiedExecError;
use crate::unified_exec::UnifiedExecSession;
use crate::unified_exec::UnifiedExecSessionManager;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::ReviewDecision;
use codex_protocol::protocol::SandboxPolicy;
use futures::future::BoxFuture;
use std::collections::HashMap;
use std::path::PathBuf;
@@ -106,6 +109,15 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
.await
})
}
fn wants_initial_approval(
&self,
req: &UnifiedExecRequest,
policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
) -> bool {
requires_initial_appoval(policy, sandbox_policy, &req.command, false)
}
}
impl<'a> ToolRuntime<UnifiedExecRequest, UnifiedExecSession> for UnifiedExecRuntime<'a> {

View File

@@ -1,6 +1,7 @@
#![allow(clippy::unwrap_used, clippy::expect_used)]
use anyhow::Result;
use codex_core::features::Feature;
use codex_core::model_family::find_family_for_model;
use codex_core::protocol::ApplyPatchApprovalRequestEvent;
use codex_core::protocol::AskForApproval;
@@ -24,6 +25,7 @@ use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use pretty_assertions::assert_eq;
use regex_lite::Regex;
use serde_json::Value;
use serde_json::json;
use std::env;
@@ -71,6 +73,9 @@ enum ActionKind {
RunCommand {
command: &'static [&'static str],
},
RunUnifiedExecCommand {
command: &'static str,
},
ApplyPatchFunction {
target: TargetPath,
content: &'static str,
@@ -134,6 +139,17 @@ impl ActionKind {
let event = shell_event(call_id, &command, 1_000, with_escalated_permissions)?;
Ok((event, Some(command)))
}
ActionKind::RunUnifiedExecCommand { command } => {
let event = exec_command_event(call_id, command, Some(1000))?;
Ok((
event,
Some(vec![
"/bin/bash".to_string(),
"-lc".to_string(),
command.to_string(),
]),
))
}
ActionKind::ApplyPatchFunction { target, content } => {
let (path, patch_path) = target.resolve_for_patch(test);
let _ = fs::remove_file(&path);
@@ -183,6 +199,17 @@ fn shell_event(
Ok(ev_function_call(call_id, "shell", &args_str))
}
fn exec_command_event(call_id: &str, cmd: &str, yield_time_ms: Option<u64>) -> Result<Value> {
let mut args = json!({
"cmd": cmd.to_string(),
});
if let Some(yield_time_ms) = yield_time_ms {
args["yield_time_ms"] = json!(yield_time_ms);
}
let args_str = serde_json::to_string(&args)?;
Ok(ev_function_call(call_id, "exec_command", &args_str))
}
#[derive(Clone)]
enum Expectation {
FileCreated {
@@ -206,6 +233,9 @@ enum Expectation {
CommandSuccess {
stdout_contains: &'static str,
},
CommandFailure {
output_contains: &'static str,
},
}
impl Expectation {
@@ -337,6 +367,19 @@ impl Expectation {
result.stdout
);
}
Expectation::CommandFailure { output_contains } => {
assert_ne!(
result.exit_code,
Some(0),
"expected non-zero exit for command failure: {}",
result.stdout
);
assert!(
result.stdout.contains(output_contains),
"command failure stderr missing {output_contains:?}: {}",
result.stdout
);
}
}
Ok(())
}
@@ -362,7 +405,7 @@ struct ScenarioSpec {
sandbox_policy: SandboxPolicy,
action: ActionKind,
with_escalated_permissions: bool,
requires_apply_patch_tool: bool,
features: Vec<Feature>,
model_override: Option<&'static str>,
outcome: Outcome,
expectation: Expectation,
@@ -410,10 +453,24 @@ fn parse_result(item: &Value) -> CommandResult {
let stdout = parsed["output"].as_str().unwrap_or_default().to_string();
CommandResult { exit_code, stdout }
}
Err(_) => CommandResult {
exit_code: None,
stdout: output_str.to_string(),
},
Err(_) => {
let regex =
Regex::new(r"(?s)^.*?Process exited with code (\d+)\n.*?Output:\n(.*)$").unwrap();
// parse freeform output
if let Some(captures) = regex.captures(output_str) {
let exit_code = captures.get(1).unwrap().as_str().parse::<i64>().unwrap();
let output = captures.get(2).unwrap().as_str();
CommandResult {
exit_code: Some(exit_code),
stdout: output.to_string(),
}
} else {
CommandResult {
exit_code: None,
stdout: output_str.to_string(),
}
}
}
}
}
@@ -506,7 +563,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "danger-on-request",
},
with_escalated_permissions: false,
requires_apply_patch_tool: false,
features: vec![],
model_override: None,
outcome: Outcome::Auto,
expectation: Expectation::FileCreated {
@@ -523,7 +580,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
response_body: "danger-network-ok",
},
with_escalated_permissions: false,
requires_apply_patch_tool: false,
features: vec![],
model_override: None,
outcome: Outcome::Auto,
expectation: Expectation::NetworkSuccess {
@@ -538,7 +595,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
command: &["echo", "trusted-unless"],
},
with_escalated_permissions: false,
requires_apply_patch_tool: false,
features: vec![],
model_override: None,
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccess {
@@ -554,7 +611,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "danger-on-failure",
},
with_escalated_permissions: false,
requires_apply_patch_tool: false,
features: vec![],
model_override: None,
outcome: Outcome::Auto,
expectation: Expectation::FileCreated {
@@ -571,7 +628,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "danger-unless-trusted",
},
with_escalated_permissions: false,
requires_apply_patch_tool: false,
features: vec![],
model_override: None,
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
@@ -591,7 +648,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "danger-never",
},
with_escalated_permissions: false,
requires_apply_patch_tool: false,
features: vec![],
model_override: None,
outcome: Outcome::Auto,
expectation: Expectation::FileCreated {
@@ -608,7 +665,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "read-only-approval",
},
with_escalated_permissions: true,
requires_apply_patch_tool: false,
features: vec![],
model_override: None,
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
@@ -627,7 +684,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
command: &["echo", "trusted-read-only"],
},
with_escalated_permissions: false,
requires_apply_patch_tool: false,
features: vec![],
model_override: None,
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccess {
@@ -643,7 +700,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
response_body: "should-not-see",
},
with_escalated_permissions: false,
requires_apply_patch_tool: false,
features: vec![],
model_override: None,
outcome: Outcome::Auto,
expectation: Expectation::NetworkFailure { expect_tag: "ERR:" },
@@ -657,7 +714,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "should-not-write",
},
with_escalated_permissions: true,
requires_apply_patch_tool: false,
features: vec![],
model_override: None,
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Denied,
@@ -678,7 +735,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "read-only-on-failure",
},
with_escalated_permissions: false,
requires_apply_patch_tool: false,
features: vec![],
model_override: None,
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
@@ -698,7 +755,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
response_body: "read-only-network-ok",
},
with_escalated_permissions: true,
requires_apply_patch_tool: false,
features: vec![],
model_override: None,
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
@@ -717,7 +774,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "shell-apply-patch",
},
with_escalated_permissions: false,
requires_apply_patch_tool: true,
features: vec![],
model_override: None,
outcome: Outcome::PatchApproval {
decision: ReviewDecision::Approved,
@@ -737,7 +794,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "function-apply-patch",
},
with_escalated_permissions: false,
requires_apply_patch_tool: true,
features: vec![],
model_override: Some("gpt-5-codex"),
outcome: Outcome::Auto,
expectation: Expectation::PatchApplied {
@@ -754,7 +811,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "function-patch-danger",
},
with_escalated_permissions: false,
requires_apply_patch_tool: true,
features: vec![Feature::ApplyPatchFreeform],
model_override: Some("gpt-5-codex"),
outcome: Outcome::Auto,
expectation: Expectation::PatchApplied {
@@ -771,7 +828,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "function-patch-outside",
},
with_escalated_permissions: false,
requires_apply_patch_tool: true,
features: vec![],
model_override: Some("gpt-5-codex"),
outcome: Outcome::PatchApproval {
decision: ReviewDecision::Approved,
@@ -791,7 +848,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "function-patch-outside-denied",
},
with_escalated_permissions: false,
requires_apply_patch_tool: true,
features: vec![],
model_override: Some("gpt-5-codex"),
outcome: Outcome::PatchApproval {
decision: ReviewDecision::Denied,
@@ -811,7 +868,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "shell-patch-outside",
},
with_escalated_permissions: false,
requires_apply_patch_tool: true,
features: vec![],
model_override: None,
outcome: Outcome::PatchApproval {
decision: ReviewDecision::Approved,
@@ -831,7 +888,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "function-patch-unless-trusted",
},
with_escalated_permissions: false,
requires_apply_patch_tool: true,
features: vec![],
model_override: Some("gpt-5-codex"),
outcome: Outcome::PatchApproval {
decision: ReviewDecision::Approved,
@@ -851,7 +908,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "function-patch-never",
},
with_escalated_permissions: false,
requires_apply_patch_tool: true,
features: vec![],
model_override: Some("gpt-5-codex"),
outcome: Outcome::Auto,
expectation: Expectation::FileNotCreated {
@@ -870,7 +927,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "read-only-unless-trusted",
},
with_escalated_permissions: false,
requires_apply_patch_tool: false,
features: vec![],
model_override: None,
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
@@ -890,7 +947,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "read-only-never",
},
with_escalated_permissions: false,
requires_apply_patch_tool: false,
features: vec![],
model_override: None,
outcome: Outcome::Auto,
expectation: Expectation::FileNotCreated {
@@ -910,7 +967,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
command: &["echo", "trusted-never"],
},
with_escalated_permissions: false,
requires_apply_patch_tool: false,
features: vec![],
model_override: None,
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccess {
@@ -926,7 +983,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "workspace-on-request",
},
with_escalated_permissions: false,
requires_apply_patch_tool: false,
features: vec![],
model_override: None,
outcome: Outcome::Auto,
expectation: Expectation::FileCreated {
@@ -943,7 +1000,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
response_body: "workspace-network-blocked",
},
with_escalated_permissions: false,
requires_apply_patch_tool: false,
features: vec![],
model_override: None,
outcome: Outcome::Auto,
expectation: Expectation::NetworkFailure { expect_tag: "ERR:" },
@@ -957,7 +1014,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "workspace-on-request-outside",
},
with_escalated_permissions: true,
requires_apply_patch_tool: false,
features: vec![],
model_override: None,
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
@@ -977,7 +1034,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
response_body: "workspace-network-ok",
},
with_escalated_permissions: false,
requires_apply_patch_tool: false,
features: vec![],
model_override: None,
outcome: Outcome::Auto,
expectation: Expectation::NetworkSuccess {
@@ -994,7 +1051,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "workspace-on-failure",
},
with_escalated_permissions: false,
requires_apply_patch_tool: false,
features: vec![],
model_override: None,
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
@@ -1014,7 +1071,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "workspace-unless-trusted",
},
with_escalated_permissions: false,
requires_apply_patch_tool: false,
features: vec![],
model_override: None,
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
@@ -1034,7 +1091,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
content: "workspace-never",
},
with_escalated_permissions: false,
requires_apply_patch_tool: false,
features: vec![],
model_override: None,
outcome: Outcome::Auto,
expectation: Expectation::FileNotCreated {
@@ -1046,6 +1103,39 @@ fn scenarios() -> Vec<ScenarioSpec> {
},
},
},
ScenarioSpec {
name: "unified exec on request no approval for safe command",
approval_policy: OnRequest,
sandbox_policy: SandboxPolicy::DangerFullAccess,
action: ActionKind::RunUnifiedExecCommand {
command: "echo \"hello unified exec\"",
},
with_escalated_permissions: false,
features: vec![Feature::UnifiedExec],
model_override: None,
outcome: Outcome::Auto,
expectation: Expectation::CommandSuccess {
stdout_contains: "hello unified exec",
},
},
ScenarioSpec {
name: "unified exec on request requires approval unless trusted",
approval_policy: AskForApproval::UnlessTrusted,
sandbox_policy: SandboxPolicy::DangerFullAccess,
action: ActionKind::RunUnifiedExecCommand {
command: "git reset --hard",
},
with_escalated_permissions: false,
features: vec![Feature::UnifiedExec],
model_override: None,
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Denied,
expected_reason: None,
},
expectation: Expectation::CommandFailure {
output_contains: "rejected by user",
},
},
]
}
@@ -1065,7 +1155,7 @@ async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> {
let server = start_mock_server().await;
let approval_policy = scenario.approval_policy;
let sandbox_policy = scenario.sandbox_policy.clone();
let requires_apply_patch_tool = scenario.requires_apply_patch_tool;
let features = scenario.features.clone();
let model_override = scenario.model_override;
let mut builder = test_codex().with_config(move |config| {
@@ -1075,8 +1165,8 @@ async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> {
config.model = model.to_string();
config.model_family =
find_family_for_model(model).expect("model should map to a known family");
if requires_apply_patch_tool {
config.include_apply_patch_tool = true;
for feature in features {
config.features.enable(feature);
}
});
let test = builder.build(&server).await?;