fix: preserve zsh-fork escalation fds across unified-exec spawn paths (#13644)

## Why `zsh-fork` sessions launched through unified-exec need the escalation socket to survive the wrapper -> server -> child handoff so later intercepted `exec()` calls can still reach the escalation server. The inherited-fd spawn path also needs to avoid closing Rust's internal exec-error pipe, and the shell-escalation handoff needs to tolerate the receive-side case where a transferred fd is installed into the same stdio slot it will be mapped onto. ## What Changed - Added `SpawnLifecycle::inherited_fds()` in `codex-rs/core/src/unified_exec/process.rs` and threaded inherited fds through `codex-rs/core/src/unified_exec/process_manager.rs` so unified-exec can preserve required descriptors across both PTY and no-stdin pipe spawn paths. - Updated `codex-rs/core/src/tools/runtimes/shell/zsh_fork_backend.rs` to expose the escalation socket fd through the spawn lifecycle. - Added inherited-fd-aware spawn helpers in `codex-rs/utils/pty/src/pty.rs` and `codex-rs/utils/pty/src/pipe.rs`, including Unix pre-exec fd pruning that preserves requested inherited fds while leaving `FD_CLOEXEC` descriptors alone. The pruning helper is now named `close_inherited_fds_except()` to better describe that behavior. - Updated `codex-rs/shell-escalation/src/unix/escalate_client.rs` to duplicate local stdio before transfer and send destination stdio numbers in `SuperExecMessage`, so the wrapper keeps using its own `stdin`/`stdout`/`stderr` until the escalated child takes over. - Updated `codex-rs/shell-escalation/src/unix/escalate_server.rs` so the server accepts the overlap case where a received fd reuses the same stdio descriptor number that the child setup will target with `dup2`. - Added comments around the PTY stdio wiring and the overlap regression helper to make the fd handoff and controlling-terminal setup easier to follow. ## Verification - `cargo test -p codex-utils-pty` - covers preserved-fd PTY spawn behavior, PTY resize, Python REPL continuity, exec-failure reporting, and the no-stdin pipe path - `cargo test -p codex-shell-escalation` - covers duplicated-fd transfer on the client side and verifies the overlap case by passing a pipe-backed stdin payload through the server-side `dup2` path --- [//]: # (BEGIN SAPLING FOOTER) Stack created with [Sapling](https://sapling-scm.com). Best reviewed with [ReviewStack](https://reviewstack.dev/openai/codex/pull/13644). * #14624 * __->__ #13644
2026-05-06 06:12:59 +03:00 · 2026-03-13 13:25:31 -07:00
parent 014e19510d
commit ef37d313c6
10 changed files with 927 additions and 30 deletions
--- a/codex-rs/shell-escalation/src/lib.rs
+++ b/codex-rs/shell-escalation/src/lib.rs
@@ -28,6 +28,8 @@ pub use unix::ShellCommandExecutor;
 #[cfg(unix)]
 pub use unix::Stopwatch;
 #[cfg(unix)]
+pub use unix::escalate_protocol::ESCALATE_SOCKET_ENV_VAR;
+#[cfg(unix)]
 pub use unix::main_execve_wrapper;
 #[cfg(unix)]
 pub use unix::run_shell_escalation_execve_wrapper;
--- a/codex-rs/shell-escalation/src/unix/escalate_client.rs
+++ b/codex-rs/shell-escalation/src/unix/escalate_client.rs
@@ -1,6 +1,6 @@
 use std::io;
+use std::os::fd::AsFd;
 use std::os::fd::AsRawFd;
-use std::os::fd::FromRawFd as _;
 use std::os::fd::OwnedFd;

 use anyhow::Context as _;
@@ -28,6 +28,12 @@ fn get_escalate_client() -> anyhow::Result<AsyncDatagramSocket> {
    Ok(unsafe { AsyncDatagramSocket::from_raw_fd(client_fd) }?)
 }

+fn duplicate_fd_for_transfer(fd: impl AsFd, name: &str) -> anyhow::Result<OwnedFd> {
+    fd.as_fd()
+        .try_clone_to_owned()
+        .with_context(|| format!("failed to duplicate {name} for escalation transfer"))
+}
+
 pub async fn run_shell_escalation_execve_wrapper(
    file: String,
    argv: Vec<String>,
@@ -62,11 +68,18 @@ pub async fn run_shell_escalation_execve_wrapper(
        .context("failed to receive EscalateResponse")?;
    match message.action {
        EscalateAction::Escalate => {
-            // TODO: maybe we should send ALL open FDs (except the escalate client)?
+            // Duplicate stdio before transferring ownership to the server. The
+            // wrapper must keep using its own stdin/stdout/stderr until the
+            // escalated child takes over.
+            let destination_fds = [
+                io::stdin().as_raw_fd(),
+                io::stdout().as_raw_fd(),
+                io::stderr().as_raw_fd(),
+            ];
            let fds_to_send = [
-                unsafe { OwnedFd::from_raw_fd(io::stdin().as_raw_fd()) },
-                unsafe { OwnedFd::from_raw_fd(io::stdout().as_raw_fd()) },
-                unsafe { OwnedFd::from_raw_fd(io::stderr().as_raw_fd()) },
+                duplicate_fd_for_transfer(io::stdin(), "stdin")?,
+                duplicate_fd_for_transfer(io::stdout(), "stdout")?,
+                duplicate_fd_for_transfer(io::stderr(), "stderr")?,
            ];

            // TODO: also forward signals over the super-exec socket
@@ -74,7 +87,7 @@ pub async fn run_shell_escalation_execve_wrapper(
            client
                .send_with_fds(
                    SuperExecMessage {
-                        fds: fds_to_send.iter().map(AsRawFd::as_raw_fd).collect(),
+                        fds: destination_fds.into_iter().collect(),
                    },
                    &fds_to_send,
                )
@@ -115,3 +128,23 @@ pub async fn run_shell_escalation_execve_wrapper(
        }
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::os::fd::AsRawFd;
+    use std::os::unix::net::UnixStream;
+
+    #[test]
+    fn duplicate_fd_for_transfer_does_not_close_original() {
+        let (left, _right) = UnixStream::pair().expect("socket pair");
+        let original_fd = left.as_raw_fd();
+
+        let duplicate = duplicate_fd_for_transfer(&left, "test fd").expect("duplicate fd");
+        assert_ne!(duplicate.as_raw_fd(), original_fd);
+
+        drop(duplicate);
+
+        assert_ne!(unsafe { libc::fcntl(original_fd, libc::F_GETFD) }, -1);
+    }
+}
--- a/codex-rs/shell-escalation/src/unix/escalate_server.rs
+++ b/codex-rs/shell-escalation/src/unix/escalate_server.rs
@@ -319,16 +319,6 @@ async fn handle_escalate_session_with_policy(
                ));
            }

-            if msg
-                .fds
-                .iter()
-                .any(|src_fd| fds.iter().any(|dst_fd| dst_fd.as_raw_fd() == *src_fd))
-            {
-                return Err(anyhow::anyhow!(
-                    "overlapping fds not yet supported in SuperExecMessage"
-                ));
-            }
-
            let PreparedExec {
                command,
                cwd,
@@ -398,6 +388,7 @@ mod tests {
    use codex_utils_absolute_path::AbsolutePathBuf;
    use pretty_assertions::assert_eq;
    use std::collections::HashMap;
+    use std::io::Write;
    use std::os::fd::AsRawFd;
    use std::os::fd::FromRawFd;
    use std::path::PathBuf;
@@ -812,6 +803,126 @@ mod tests {
        server_task.await?
    }

+    /// Saves a target descriptor, closes it, and restores it when dropped.
+    ///
+    /// The overlap regression test needs the next received `SCM_RIGHTS` handle
+    /// to land on a specific descriptor number such as stdin. Temporarily
+    /// closing the descriptor makes that allocation possible while still
+    /// letting the test put the process back the way it found it.
+    struct RestoredFd {
+        target_fd: i32,
+        original_fd: std::os::fd::OwnedFd,
+    }
+
+    impl RestoredFd {
+        /// Duplicates `target_fd`, then closes the original descriptor number.
+        ///
+        /// The duplicate is kept alive so `Drop` can restore the original
+        /// process state after the test finishes.
+        fn close_temporarily(target_fd: i32) -> anyhow::Result<Self> {
+            let original_fd = unsafe { libc::dup(target_fd) };
+            if original_fd == -1 {
+                return Err(std::io::Error::last_os_error().into());
+            }
+            if unsafe { libc::close(target_fd) } == -1 {
+                let err = std::io::Error::last_os_error();
+                unsafe {
+                    libc::close(original_fd);
+                }
+                return Err(err.into());
+            }
+            Ok(Self {
+                target_fd,
+                original_fd: unsafe { std::os::fd::OwnedFd::from_raw_fd(original_fd) },
+            })
+        }
+    }
+
+    /// Restores the original descriptor back onto its original fd number.
+    ///
+    /// This keeps the overlap test self-contained even though it mutates the
+    /// current process's stdio table.
+    impl Drop for RestoredFd {
+        fn drop(&mut self) {
+            unsafe {
+                libc::dup2(self.original_fd.as_raw_fd(), self.target_fd);
+            }
+        }
+    }
+
+    #[tokio::test]
+    async fn handle_escalate_session_accepts_received_fds_that_overlap_destinations()
+    -> anyhow::Result<()> {
+        let _guard = ESCALATE_SERVER_TEST_LOCK.lock().await;
+        let mut pipe_fds = [0; 2];
+        if unsafe { libc::pipe(pipe_fds.as_mut_ptr()) } == -1 {
+            return Err(std::io::Error::last_os_error().into());
+        }
+        let read_end = unsafe { std::os::fd::OwnedFd::from_raw_fd(pipe_fds[0]) };
+        let mut write_end = unsafe { std::fs::File::from_raw_fd(pipe_fds[1]) };
+
+        // Force the receive-side overlap case for stdin.
+        //
+        // SCM_RIGHTS installs received descriptors into the lowest available fd
+        // numbers in the receiving process. The pipe is opened first so its
+        // read end does not consume fd 0. After stdin is temporarily closed,
+        // receiving `read_end` should reuse descriptor 0. The message below
+        // also asks the server to map that received fd to destination fd 0, so
+        // the pre-exec dup2 loop exercises the src_fd == dst_fd case.
+        let stdin_restore = RestoredFd::close_temporarily(libc::STDIN_FILENO)?;
+        let (server, client) = AsyncSocket::pair()?;
+        let server_task = tokio::spawn(handle_escalate_session_with_policy(
+            server,
+            Arc::new(DeterministicEscalationPolicy {
+                decision: EscalationDecision::escalate(EscalationExecution::Unsandboxed),
+            }),
+            Arc::new(ForwardingShellCommandExecutor),
+            CancellationToken::new(),
+            CancellationToken::new(),
+        ));
+
+        client
+            .send(EscalateRequest {
+                file: PathBuf::from("/bin/sh"),
+                argv: vec![
+                    "sh".to_string(),
+                    "-c".to_string(),
+                    "IFS= read -r line && [ \"$line\" = overlap-ok ]".to_string(),
+                ],
+                workdir: AbsolutePathBuf::current_dir()?,
+                env: HashMap::new(),
+            })
+            .await?;
+
+        let response = client.receive::<EscalateResponse>().await?;
+        assert_eq!(
+            EscalateResponse {
+                action: EscalateAction::Escalate,
+            },
+            response
+        );
+
+        client
+            .send_with_fds(
+                SuperExecMessage {
+                    fds: vec![libc::STDIN_FILENO],
+                },
+                &[read_end],
+            )
+            .await?;
+        write_end.write_all(b"overlap-ok\n")?;
+        drop(write_end);
+
+        let result = client.receive::<SuperExecResult>().await?;
+        assert_eq!(
+            0, result.exit_code,
+            "expected the escalated child to read the sent stdin payload even when the received fd reuses fd 0"
+        );
+        drop(stdin_restore);
+
+        server_task.await?
+    }
+
    #[tokio::test]
    async fn handle_escalate_session_passes_permissions_to_executor() -> anyhow::Result<()> {
        let _guard = ESCALATE_SERVER_TEST_LOCK.lock().await;