feat: include sandbox config with escalation request (#12839)

## Why Before this change, an escalation approval could say that a command should be rerun, but it could not carry the sandbox configuration that should still apply when the escalated command is actually spawned. That left an unsafe gap in the `zsh-fork` skill path: skill scripts under `scripts/` that did not declare permissions could be escalated without a sandbox, and scripts that did declare permissions could lose their bounded sandbox on rerun or cached session approval. This PR extends the escalation protocol so approvals can optionally carry sandbox configuration all the way through execution. That lets the shell runtime preserve the intended sandbox instead of silently widening access. We likely want a single permissions type for this codepath eventually, probably centered on `Permissions`. For now, the protocol needs to represent both the existing `PermissionProfile` form and the fuller `Permissions` form, so this introduces a temporary disjoint union, `EscalationPermissions`, to carry either one. Further, this means that today, a skill either: - does not declare any permissions, in which case it is run using the default sandbox for the turn - specifies permissions, in which case the skill is run using that exact sandbox, which might be more restrictive than the default sandbox for the turn We will likely change the skill's permissions to be additive to the existing permissions for the turn. ## What Changed - Added `EscalationPermissions` to `codex-protocol` so escalation requests can carry either a `PermissionProfile` or a full `Permissions` payload. - Added an explicit `EscalationExecution` mode to the shell escalation protocol so reruns distinguish between `Unsandboxed`, `TurnDefault`, and `Permissions(...)` instead of overloading `None`. - Updated `zsh-fork` shell reruns to resolve `TurnDefault` at execution time, which keeps ordinary `UseDefault` commands on the turn sandbox and preserves turn-level macOS seatbelt profile extensions. - Updated the `zsh-fork` skill path so a skill with no declared permissions inherits the conversation's effective sandbox instead of escalating unsandboxed. - Updated the `zsh-fork` skill path so a skill with declared permissions reruns with exactly those permissions, including when a cached session approval is reused. ## Testing - Added unit coverage in `core/src/tools/runtimes/shell/unix_escalation.rs` for the explicit `UseDefault` / `RequireEscalated` / `WithAdditionalPermissions` execution mapping. - Added unit coverage in `core/src/tools/runtimes/shell/unix_escalation.rs` for macOS seatbelt extension preservation in both the `TurnDefault` and explicit-permissions rerun paths. - Added integration coverage in `core/tests/suite/skill_approval.rs` for permissionless skills inheriting the turn sandbox and explicit skill permissions remaining bounded across cached approval reuse.
2026-04-30 11:21:34 +03:00 · 2026-02-26 12:00:18 -08:00
parent 6b879fe248
commit 7fa9d9ae35
19 changed files with 1229 additions and 202 deletions
--- a/codex-rs/shell-escalation/src/lib.rs
+++ b/codex-rs/shell-escalation/src/lib.rs
@@ -6,12 +6,22 @@ pub use unix::EscalateAction;
 #[cfg(unix)]
 pub use unix::EscalateServer;
 #[cfg(unix)]
+pub use unix::EscalationDecision;
+#[cfg(unix)]
+pub use unix::EscalationExecution;
+#[cfg(unix)]
+pub use unix::EscalationPermissions;
+#[cfg(unix)]
 pub use unix::EscalationPolicy;
 #[cfg(unix)]
 pub use unix::ExecParams;
 #[cfg(unix)]
 pub use unix::ExecResult;
 #[cfg(unix)]
+pub use unix::Permissions;
+#[cfg(unix)]
+pub use unix::PreparedExec;
+#[cfg(unix)]
 pub use unix::ShellCommandExecutor;
 #[cfg(unix)]
 pub use unix::Stopwatch;
--- a/codex-rs/shell-escalation/src/unix/escalate_protocol.rs
+++ b/codex-rs/shell-escalation/src/unix/escalate_protocol.rs
@@ -2,6 +2,7 @@ use std::collections::HashMap;
 use std::os::fd::RawFd;
 use std::path::PathBuf;

+use codex_protocol::approvals::EscalationPermissions;
 use codex_utils_absolute_path::AbsolutePathBuf;
 use serde::Deserialize;
 use serde::Serialize;
@@ -35,6 +36,38 @@ pub struct EscalateResponse {
    pub action: EscalateAction,
 }

+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum EscalationDecision {
+    Run,
+    Escalate(EscalationExecution),
+    Deny { reason: Option<String> },
+}
+
+#[allow(clippy::large_enum_variant)]
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum EscalationExecution {
+    /// Rerun the intercepted command outside any sandbox wrapper.
+    Unsandboxed,
+    /// Rerun using the turn's current sandbox configuration.
+    TurnDefault,
+    /// Rerun using an explicit sandbox configuration attached to the request.
+    Permissions(EscalationPermissions),
+}
+
+impl EscalationDecision {
+    pub fn run() -> Self {
+        Self::Run
+    }
+
+    pub fn escalate(execution: EscalationExecution) -> Self {
+        Self::Escalate(execution)
+    }
+
+    pub fn deny(reason: Option<String>) -> Self {
+        Self::Deny { reason }
+    }
+}
+
 #[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)]
 pub enum EscalateAction {
    /// The command should be run directly by the client.
--- a/codex-rs/shell-escalation/src/unix/escalate_server.rs
+++ b/codex-rs/shell-escalation/src/unix/escalate_server.rs
@@ -15,6 +15,8 @@ use crate::unix::escalate_protocol::EXEC_WRAPPER_ENV_VAR;
 use crate::unix::escalate_protocol::EscalateAction;
 use crate::unix::escalate_protocol::EscalateRequest;
 use crate::unix::escalate_protocol::EscalateResponse;
+use crate::unix::escalate_protocol::EscalationDecision;
+use crate::unix::escalate_protocol::EscalationExecution;
 use crate::unix::escalate_protocol::LEGACY_BASH_EXEC_WRAPPER_ENV_VAR;
 use crate::unix::escalate_protocol::SuperExecMessage;
 use crate::unix::escalate_protocol::SuperExecResult;
@@ -37,6 +39,16 @@ pub trait ShellCommandExecutor: Send + Sync {
        env: HashMap<String, String>,
        cancel_rx: CancellationToken,
    ) -> anyhow::Result<ExecResult>;
+
+    /// Prepares an escalated subcommand for execution on the server side.
+    async fn prepare_escalated_exec(
+        &self,
+        program: &AbsolutePathBuf,
+        argv: &[String],
+        workdir: &AbsolutePathBuf,
+        env: HashMap<String, String>,
+        execution: EscalationExecution,
+    ) -> anyhow::Result<PreparedExec>;
 }

 #[derive(Debug, serde::Deserialize, serde::Serialize)]
@@ -62,6 +74,14 @@ pub struct ExecResult {
    pub timed_out: bool,
 }

+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct PreparedExec {
+    pub command: Vec<String>,
+    pub cwd: PathBuf,
+    pub env: HashMap<String, String>,
+    pub arg0: Option<String>,
+}
+
 pub struct EscalateServer {
    bash_path: PathBuf,
    execve_wrapper: PathBuf,
@@ -69,9 +89,9 @@ pub struct EscalateServer {
 }

 impl EscalateServer {
-    pub fn new<P>(bash_path: PathBuf, execve_wrapper: PathBuf, policy: P) -> Self
+    pub fn new<Policy>(bash_path: PathBuf, execve_wrapper: PathBuf, policy: Policy) -> Self
    where
-        P: EscalationPolicy + Send + Sync + 'static,
+        Policy: EscalationPolicy + Send + Sync + 'static,
    {
        Self {
            bash_path,
@@ -84,13 +104,17 @@ impl EscalateServer {
        &self,
        params: ExecParams,
        cancel_rx: CancellationToken,
-        command_executor: &dyn ShellCommandExecutor,
+        command_executor: Arc<dyn ShellCommandExecutor>,
    ) -> anyhow::Result<ExecResult> {
        let (escalate_server, escalate_client) = AsyncDatagramSocket::pair()?;
        let client_socket = escalate_client.into_inner();
        // Only the client endpoint should cross exec into the wrapper process.
        client_socket.set_cloexec(false)?;
-        let escalate_task = tokio::spawn(escalate_task(escalate_server, self.policy.clone()));
+        let escalate_task = tokio::spawn(escalate_task(
+            escalate_server,
+            Arc::clone(&self.policy),
+            Arc::clone(&command_executor),
+        ));
        let mut env = std::env::vars().collect::<HashMap<String, String>>();
        env.insert(
            ESCALATE_SOCKET_ENV_VAR.to_string(),
@@ -126,6 +150,7 @@ impl EscalateServer {
 async fn escalate_task(
    socket: AsyncDatagramSocket,
    policy: Arc<dyn EscalationPolicy>,
+    command_executor: Arc<dyn ShellCommandExecutor>,
 ) -> anyhow::Result<()> {
    loop {
        let (_, mut fds) = socket.receive_with_fds().await?;
@@ -134,9 +159,12 @@ async fn escalate_task(
            continue;
        }
        let stream_socket = AsyncSocket::from_fd(fds.remove(0))?;
-        let policy = policy.clone();
+        let policy = Arc::clone(&policy);
+        let command_executor = Arc::clone(&command_executor);
        tokio::spawn(async move {
-            if let Err(err) = handle_escalate_session_with_policy(stream_socket, policy).await {
+            if let Err(err) =
+                handle_escalate_session_with_policy(stream_socket, policy, command_executor).await
+            {
                tracing::error!("escalate session failed: {err:?}");
            }
        });
@@ -146,6 +174,7 @@ async fn escalate_task(
 async fn handle_escalate_session_with_policy(
    socket: AsyncSocket,
    policy: Arc<dyn EscalationPolicy>,
+    command_executor: Arc<dyn ShellCommandExecutor>,
 ) -> anyhow::Result<()> {
    let EscalateRequest {
        file,
@@ -154,22 +183,22 @@ async fn handle_escalate_session_with_policy(
        env,
    } = socket.receive::<EscalateRequest>().await?;
    let program = AbsolutePathBuf::resolve_path_against_base(file, workdir.as_path())?;
-    let action = policy
+    let decision = policy
        .determine_action(&program, &argv, &workdir)
        .await
        .context("failed to determine escalation action")?;

-    tracing::debug!("decided {action:?} for {program:?} {argv:?} {workdir:?}");
+    tracing::debug!("decided {decision:?} for {program:?} {argv:?} {workdir:?}");

-    match action {
-        EscalateAction::Run => {
+    match decision {
+        EscalationDecision::Run => {
            socket
                .send(EscalateResponse {
                    action: EscalateAction::Run,
                })
                .await?;
        }
-        EscalateAction::Escalate => {
+        EscalationDecision::Escalate(execution) => {
            socket
                .send(EscalateResponse {
                    action: EscalateAction::Escalate,
@@ -197,12 +226,23 @@ async fn handle_escalate_session_with_policy(
                ));
            }

-            let mut command = Command::new(program.as_path());
+            let PreparedExec {
+                command,
+                cwd,
+                env,
+                arg0,
+            } = command_executor
+                .prepare_escalated_exec(&program, &argv, &workdir, env, execution)
+                .await?;
+            let (program, args) = command
+                .split_first()
+                .ok_or_else(|| anyhow::anyhow!("prepared escalated command must not be empty"))?;
+            let mut command = Command::new(program);
            command
-                .args(&argv[1..])
-                .arg0(argv[0].clone())
+                .args(args)
+                .arg0(arg0.unwrap_or_else(|| program.clone()))
                .envs(&env)
-                .current_dir(&workdir)
+                .current_dir(&cwd)
                .stdin(Stdio::null())
                .stdout(Stdio::null())
                .stderr(Stdio::null());
@@ -222,7 +262,7 @@ async fn handle_escalate_session_with_policy(
                })
                .await?;
        }
-        EscalateAction::Deny { reason } => {
+        EscalationDecision::Deny { reason } => {
            socket
                .send(EscalateResponse {
                    action: EscalateAction::Deny { reason },
@@ -236,13 +276,15 @@ async fn handle_escalate_session_with_policy(
 #[cfg(test)]
 mod tests {
    use super::*;
+    use codex_protocol::approvals::EscalationPermissions;
+    use codex_protocol::models::PermissionProfile;
    use codex_utils_absolute_path::AbsolutePathBuf;
    use pretty_assertions::assert_eq;
    use std::collections::HashMap;
    use std::path::PathBuf;

    struct DeterministicEscalationPolicy {
-        action: EscalateAction,
+        decision: EscalationDecision,
    }

    #[async_trait::async_trait]
@@ -252,8 +294,8 @@ mod tests {
            _file: &AbsolutePathBuf,
            _argv: &[String],
            _workdir: &AbsolutePathBuf,
-        ) -> anyhow::Result<EscalateAction> {
-            Ok(self.action.clone())
+        ) -> anyhow::Result<EscalationDecision> {
+            Ok(self.decision.clone())
        }
    }

@@ -269,10 +311,82 @@ mod tests {
            file: &AbsolutePathBuf,
            _argv: &[String],
            workdir: &AbsolutePathBuf,
-        ) -> anyhow::Result<EscalateAction> {
+        ) -> anyhow::Result<EscalationDecision> {
            assert_eq!(file, &self.expected_file);
            assert_eq!(workdir, &self.expected_workdir);
-            Ok(EscalateAction::Run)
+            Ok(EscalationDecision::run())
+        }
+    }
+
+    struct ForwardingShellCommandExecutor;
+
+    #[async_trait::async_trait]
+    impl ShellCommandExecutor for ForwardingShellCommandExecutor {
+        async fn run(
+            &self,
+            _command: Vec<String>,
+            _cwd: PathBuf,
+            _env: HashMap<String, String>,
+            _cancel_rx: CancellationToken,
+        ) -> anyhow::Result<ExecResult> {
+            unreachable!("run() is not used by handle_escalate_session_with_policy() tests")
+        }
+
+        async fn prepare_escalated_exec(
+            &self,
+            program: &AbsolutePathBuf,
+            argv: &[String],
+            workdir: &AbsolutePathBuf,
+            env: HashMap<String, String>,
+            _execution: EscalationExecution,
+        ) -> anyhow::Result<PreparedExec> {
+            Ok(PreparedExec {
+                command: std::iter::once(program.to_string_lossy().to_string())
+                    .chain(argv.iter().skip(1).cloned())
+                    .collect(),
+                cwd: workdir.to_path_buf(),
+                env,
+                arg0: argv.first().cloned(),
+            })
+        }
+    }
+
+    struct PermissionAssertingShellCommandExecutor {
+        expected_permissions: EscalationPermissions,
+    }
+
+    #[async_trait::async_trait]
+    impl ShellCommandExecutor for PermissionAssertingShellCommandExecutor {
+        async fn run(
+            &self,
+            _command: Vec<String>,
+            _cwd: PathBuf,
+            _env: HashMap<String, String>,
+            _cancel_rx: CancellationToken,
+        ) -> anyhow::Result<ExecResult> {
+            unreachable!("run() is not used by handle_escalate_session_with_policy() tests")
+        }
+
+        async fn prepare_escalated_exec(
+            &self,
+            program: &AbsolutePathBuf,
+            argv: &[String],
+            workdir: &AbsolutePathBuf,
+            env: HashMap<String, String>,
+            execution: EscalationExecution,
+        ) -> anyhow::Result<PreparedExec> {
+            assert_eq!(
+                execution,
+                EscalationExecution::Permissions(self.expected_permissions.clone())
+            );
+            Ok(PreparedExec {
+                command: std::iter::once(program.to_string_lossy().to_string())
+                    .chain(argv.iter().skip(1).cloned())
+                    .collect(),
+                cwd: workdir.to_path_buf(),
+                env,
+                arg0: argv.first().cloned(),
+            })
        }
    }

@@ -282,8 +396,9 @@ mod tests {
        let server_task = tokio::spawn(handle_escalate_session_with_policy(
            server,
            Arc::new(DeterministicEscalationPolicy {
-                action: EscalateAction::Run,
+                decision: EscalationDecision::run(),
            }),
+            Arc::new(ForwardingShellCommandExecutor),
        ));

        let mut env = HashMap::new();
@@ -326,6 +441,7 @@ mod tests {
                expected_file,
                expected_workdir: workdir.clone(),
            }),
+            Arc::new(ForwardingShellCommandExecutor),
        ));

        client
@@ -353,8 +469,9 @@ mod tests {
        let server_task = tokio::spawn(handle_escalate_session_with_policy(
            server,
            Arc::new(DeterministicEscalationPolicy {
-                action: EscalateAction::Escalate,
+                decision: EscalationDecision::escalate(EscalationExecution::Unsandboxed),
            }),
+            Arc::new(ForwardingShellCommandExecutor),
        ));

        client
@@ -387,4 +504,52 @@ mod tests {

        server_task.await?
    }
+
+    #[tokio::test]
+    async fn handle_escalate_session_passes_permissions_to_executor() -> anyhow::Result<()> {
+        let (server, client) = AsyncSocket::pair()?;
+        let server_task = tokio::spawn(handle_escalate_session_with_policy(
+            server,
+            Arc::new(DeterministicEscalationPolicy {
+                decision: EscalationDecision::escalate(EscalationExecution::Permissions(
+                    EscalationPermissions::PermissionProfile(PermissionProfile {
+                        network: Some(true),
+                        ..Default::default()
+                    }),
+                )),
+            }),
+            Arc::new(PermissionAssertingShellCommandExecutor {
+                expected_permissions: EscalationPermissions::PermissionProfile(PermissionProfile {
+                    network: Some(true),
+                    ..Default::default()
+                }),
+            }),
+        ));
+
+        client
+            .send(EscalateRequest {
+                file: PathBuf::from("/bin/sh"),
+                argv: vec!["sh".to_string(), "-c".to_string(), "exit 0".to_string()],
+                workdir: AbsolutePathBuf::current_dir()?,
+                env: HashMap::new(),
+            })
+            .await?;
+
+        let response = client.receive::<EscalateResponse>().await?;
+        assert_eq!(
+            EscalateResponse {
+                action: EscalateAction::Escalate,
+            },
+            response
+        );
+
+        client
+            .send_with_fds(SuperExecMessage { fds: Vec::new() }, &[])
+            .await?;
+
+        let result = client.receive::<SuperExecResult>().await?;
+        assert_eq!(0, result.exit_code);
+
+        server_task.await?
+    }
 }
--- a/codex-rs/shell-escalation/src/unix/escalation_policy.rs
+++ b/codex-rs/shell-escalation/src/unix/escalation_policy.rs
@@ -1,6 +1,6 @@
 use codex_utils_absolute_path::AbsolutePathBuf;

-use crate::unix::escalate_protocol::EscalateAction;
+use crate::unix::escalate_protocol::EscalationDecision;

 /// Decides what action to take in response to an execve request from a client.
 #[async_trait::async_trait]
@@ -10,5 +10,5 @@ pub trait EscalationPolicy: Send + Sync {
        file: &AbsolutePathBuf,
        argv: &[String],
        workdir: &AbsolutePathBuf,
-    ) -> anyhow::Result<EscalateAction>;
+    ) -> anyhow::Result<EscalationDecision>;
 }
--- a/codex-rs/shell-escalation/src/unix/mod.rs
+++ b/codex-rs/shell-escalation/src/unix/mod.rs
@@ -63,10 +63,15 @@ pub mod stopwatch;

 pub use self::escalate_client::run_shell_escalation_execve_wrapper;
 pub use self::escalate_protocol::EscalateAction;
+pub use self::escalate_protocol::EscalationDecision;
+pub use self::escalate_protocol::EscalationExecution;
 pub use self::escalate_server::EscalateServer;
 pub use self::escalate_server::ExecParams;
 pub use self::escalate_server::ExecResult;
+pub use self::escalate_server::PreparedExec;
 pub use self::escalate_server::ShellCommandExecutor;
 pub use self::escalation_policy::EscalationPolicy;
 pub use self::execve_wrapper::main_execve_wrapper;
 pub use self::stopwatch::Stopwatch;
+pub use codex_protocol::approvals::EscalationPermissions;
+pub use codex_protocol::approvals::Permissions;