fix: preserve zsh-fork escalation fds across unified-exec spawn paths (#13644)

## Why

`zsh-fork` sessions launched through unified-exec need the escalation
socket to survive the wrapper -> server -> child handoff so later
intercepted `exec()` calls can still reach the escalation server.

The inherited-fd spawn path also needs to avoid closing Rust's internal
exec-error pipe, and the shell-escalation handoff needs to tolerate the
receive-side case where a transferred fd is installed into the same
stdio slot it will be mapped onto.

## What Changed

- Added `SpawnLifecycle::inherited_fds()` in
`codex-rs/core/src/unified_exec/process.rs` and threaded inherited fds
through `codex-rs/core/src/unified_exec/process_manager.rs` so
unified-exec can preserve required descriptors across both PTY and
no-stdin pipe spawn paths.
- Updated `codex-rs/core/src/tools/runtimes/shell/zsh_fork_backend.rs`
to expose the escalation socket fd through the spawn lifecycle.
- Added inherited-fd-aware spawn helpers in
`codex-rs/utils/pty/src/pty.rs` and `codex-rs/utils/pty/src/pipe.rs`,
including Unix pre-exec fd pruning that preserves requested inherited
fds while leaving `FD_CLOEXEC` descriptors alone. The pruning helper is
now named `close_inherited_fds_except()` to better describe that
behavior.
- Updated `codex-rs/shell-escalation/src/unix/escalate_client.rs` to
duplicate local stdio before transfer and send destination stdio numbers
in `SuperExecMessage`, so the wrapper keeps using its own
`stdin`/`stdout`/`stderr` until the escalated child takes over.
- Updated `codex-rs/shell-escalation/src/unix/escalate_server.rs` so the
server accepts the overlap case where a received fd reuses the same
stdio descriptor number that the child setup will target with `dup2`.
- Added comments around the PTY stdio wiring and the overlap regression
helper to make the fd handoff and controlling-terminal setup easier to
follow.

## Verification

- `cargo test -p codex-utils-pty`
- covers preserved-fd PTY spawn behavior, PTY resize, Python REPL
continuity, exec-failure reporting, and the no-stdin pipe path
- `cargo test -p codex-shell-escalation`
- covers duplicated-fd transfer on the client side and verifies the
overlap case by passing a pipe-backed stdin payload through the
server-side `dup2` path

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/13644).
* #14624
* __->__ #13644
This commit is contained in:
Michael Bolin
2026-03-13 13:25:31 -07:00
committed by GitHub
parent 014e19510d
commit ef37d313c6
10 changed files with 927 additions and 30 deletions

View File

@@ -1,6 +1,20 @@
use std::collections::HashMap;
#[cfg(unix)]
use std::fs::File;
use std::io::ErrorKind;
#[cfg(unix)]
use std::os::fd::AsRawFd;
#[cfg(unix)]
use std::os::fd::FromRawFd;
#[cfg(unix)]
use std::os::fd::RawFd;
#[cfg(unix)]
use std::os::unix::process::CommandExt;
use std::path::Path;
#[cfg(unix)]
use std::process::Command as StdCommand;
#[cfg(unix)]
use std::process::Stdio;
use std::sync::atomic::AtomicBool;
use std::sync::Arc;
use std::sync::Mutex as StdMutex;
@@ -17,6 +31,7 @@ use tokio::task::JoinHandle;
use crate::process::ChildTerminator;
use crate::process::ProcessHandle;
use crate::process::PtyHandles;
use crate::process::PtyMasterHandle;
use crate::process::SpawnedProcess;
use crate::process::TerminalSize;
@@ -59,6 +74,18 @@ impl ChildTerminator for PtyChildTerminator {
}
}
#[cfg(unix)]
struct RawPidTerminator {
process_group_id: u32,
}
#[cfg(unix)]
impl ChildTerminator for RawPidTerminator {
fn kill(&mut self) -> std::io::Result<()> {
crate::process_group::kill_process_group(self.process_group_id)
}
}
fn platform_native_pty_system() -> Box<dyn portable_pty::PtySystem + Send> {
#[cfg(windows)]
{
@@ -79,11 +106,45 @@ pub async fn spawn_process(
env: &HashMap<String, String>,
arg0: &Option<String>,
size: TerminalSize,
) -> Result<SpawnedProcess> {
spawn_process_with_inherited_fds(program, args, cwd, env, arg0, size, &[]).await
}
/// Spawn a process attached to a PTY, preserving any inherited file
/// descriptors listed in `inherited_fds` across exec on Unix.
pub async fn spawn_process_with_inherited_fds(
program: &str,
args: &[String],
cwd: &Path,
env: &HashMap<String, String>,
arg0: &Option<String>,
size: TerminalSize,
inherited_fds: &[i32],
) -> Result<SpawnedProcess> {
if program.is_empty() {
anyhow::bail!("missing program for PTY spawn");
}
#[cfg(not(unix))]
let _ = inherited_fds;
#[cfg(unix)]
if !inherited_fds.is_empty() {
return spawn_process_preserving_fds(program, args, cwd, env, arg0, size, inherited_fds)
.await;
}
spawn_process_portable(program, args, cwd, env, arg0, size).await
}
async fn spawn_process_portable(
program: &str,
args: &[String],
cwd: &Path,
env: &HashMap<String, String>,
arg0: &Option<String>,
size: TerminalSize,
) -> Result<SpawnedProcess> {
let pty_system = platform_native_pty_system();
let pair = pty_system.openpty(size.into())?;
@@ -164,7 +225,7 @@ pub async fn spawn_process(
} else {
None
},
_master: pair.master,
_master: PtyMasterHandle::Resizable(pair.master),
};
let handle = ProcessHandle::new(
@@ -190,3 +251,231 @@ pub async fn spawn_process(
exit_rx,
})
}
#[cfg(unix)]
async fn spawn_process_preserving_fds(
program: &str,
args: &[String],
cwd: &Path,
env: &HashMap<String, String>,
arg0: &Option<String>,
size: TerminalSize,
inherited_fds: &[RawFd],
) -> Result<SpawnedProcess> {
let (master, slave) = open_unix_pty(size)?;
let mut command = StdCommand::new(program);
if let Some(arg0) = arg0 {
command.arg0(arg0);
}
command.current_dir(cwd);
command.env_clear();
for arg in args {
command.arg(arg);
}
for (key, value) in env {
command.env(key, value);
}
// The child should see one terminal on all three stdio streams. Cloning
// the slave fd gives us three owned handles to the same PTY slave device
// so Command can wire them up independently as stdin/stdout/stderr.
let stdin = slave.try_clone()?;
let stdout = slave.try_clone()?;
let stderr = slave.try_clone()?;
let inherited_fds = inherited_fds.to_vec();
unsafe {
command
.stdin(Stdio::from(stdin))
.stdout(Stdio::from(stdout))
.stderr(Stdio::from(stderr))
.pre_exec(move || {
for signo in &[
libc::SIGCHLD,
libc::SIGHUP,
libc::SIGINT,
libc::SIGQUIT,
libc::SIGTERM,
libc::SIGALRM,
] {
libc::signal(*signo, libc::SIG_DFL);
}
let empty_set: libc::sigset_t = std::mem::zeroed();
libc::sigprocmask(libc::SIG_SETMASK, &empty_set, std::ptr::null_mut());
if libc::setsid() == -1 {
return Err(std::io::Error::last_os_error());
}
// stdin now refers to the PTY slave, so make that fd the
// controlling terminal for the child's new session. stdout and
// stderr point at clones of the same slave device.
#[allow(clippy::cast_lossless)]
if libc::ioctl(0, libc::TIOCSCTTY as _, 0) == -1 {
return Err(std::io::Error::last_os_error());
}
close_inherited_fds_except(&inherited_fds);
Ok(())
});
}
let mut child = command.spawn()?;
drop(slave);
let process_group_id = child.id();
let (writer_tx, mut writer_rx) = mpsc::channel::<Vec<u8>>(128);
let (stdout_tx, stdout_rx) = mpsc::channel::<Vec<u8>>(128);
let (_stderr_tx, stderr_rx) = mpsc::channel::<Vec<u8>>(1);
let mut reader = master.try_clone()?;
let reader_handle: JoinHandle<()> = tokio::task::spawn_blocking(move || {
let mut buf = [0u8; 8_192];
loop {
match std::io::Read::read(&mut reader, &mut buf) {
Ok(0) => break,
Ok(n) => {
let _ = stdout_tx.blocking_send(buf[..n].to_vec());
}
Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
Err(ref e) if e.kind() == ErrorKind::WouldBlock => {
std::thread::sleep(Duration::from_millis(5));
continue;
}
Err(_) => break,
}
}
});
let writer = Arc::new(tokio::sync::Mutex::new(master.try_clone()?));
let writer_handle: JoinHandle<()> = tokio::spawn({
let writer = Arc::clone(&writer);
async move {
while let Some(bytes) = writer_rx.recv().await {
let mut guard = writer.lock().await;
use std::io::Write;
let _ = guard.write_all(&bytes);
let _ = guard.flush();
}
}
});
let (exit_tx, exit_rx) = oneshot::channel::<i32>();
let exit_status = Arc::new(AtomicBool::new(false));
let wait_exit_status = Arc::clone(&exit_status);
let exit_code = Arc::new(StdMutex::new(None));
let wait_exit_code = Arc::clone(&exit_code);
let wait_handle: JoinHandle<()> = tokio::task::spawn_blocking(move || {
let code = match child.wait() {
Ok(status) => status.code().unwrap_or(-1),
Err(_) => -1,
};
wait_exit_status.store(true, std::sync::atomic::Ordering::SeqCst);
if let Ok(mut guard) = wait_exit_code.lock() {
*guard = Some(code);
}
let _ = exit_tx.send(code);
});
let handles = PtyHandles {
_slave: None,
_master: PtyMasterHandle::Opaque {
raw_fd: master.as_raw_fd(),
_handle: Box::new(master),
},
};
let handle = ProcessHandle::new(
writer_tx,
Box::new(RawPidTerminator { process_group_id }),
reader_handle,
Vec::new(),
writer_handle,
wait_handle,
exit_status,
exit_code,
Some(handles),
);
Ok(SpawnedProcess {
session: handle,
stdout_rx,
stderr_rx,
exit_rx,
})
}
#[cfg(unix)]
fn open_unix_pty(size: TerminalSize) -> Result<(File, File)> {
let mut master: RawFd = -1;
let mut slave: RawFd = -1;
let mut size = libc::winsize {
ws_row: size.rows,
ws_col: size.cols,
ws_xpixel: 0,
ws_ypixel: 0,
};
let winp = std::ptr::addr_of_mut!(size);
let result = unsafe {
libc::openpty(
&mut master,
&mut slave,
std::ptr::null_mut(),
std::ptr::null_mut(),
winp,
)
};
if result != 0 {
anyhow::bail!("failed to openpty: {:?}", std::io::Error::last_os_error());
}
set_cloexec(master)?;
set_cloexec(slave)?;
Ok(unsafe { (File::from_raw_fd(master), File::from_raw_fd(slave)) })
}
#[cfg(unix)]
fn set_cloexec(fd: RawFd) -> std::io::Result<()> {
let flags = unsafe { libc::fcntl(fd, libc::F_GETFD) };
if flags == -1 {
return Err(std::io::Error::last_os_error());
}
let result = unsafe { libc::fcntl(fd, libc::F_SETFD, flags | libc::FD_CLOEXEC) };
if result == -1 {
return Err(std::io::Error::last_os_error());
}
Ok(())
}
#[cfg(unix)]
pub(crate) fn close_inherited_fds_except(preserved_fds: &[RawFd]) {
if let Ok(dir) = std::fs::read_dir("/dev/fd") {
let mut fds = Vec::new();
for entry in dir {
let num = entry
.ok()
.map(|entry| entry.file_name())
.and_then(|name| name.into_string().ok())
.and_then(|name| name.parse::<RawFd>().ok());
if let Some(num) = num {
if num <= 2 || preserved_fds.contains(&num) {
continue;
}
// Keep CLOEXEC descriptors open so std::process can still use
// its internal exec-error pipe to report spawn failures.
let flags = unsafe { libc::fcntl(num, libc::F_GETFD) };
if flags == -1 || flags & libc::FD_CLOEXEC != 0 {
continue;
}
fds.push(num);
}
}
for fd in fds {
unsafe {
libc::close(fd);
}
}
}
}