fix: preserve zsh-fork escalation fds across unified-exec spawn paths (#13644)

## Why

`zsh-fork` sessions launched through unified-exec need the escalation
socket to survive the wrapper -> server -> child handoff so later
intercepted `exec()` calls can still reach the escalation server.

The inherited-fd spawn path also needs to avoid closing Rust's internal
exec-error pipe, and the shell-escalation handoff needs to tolerate the
receive-side case where a transferred fd is installed into the same
stdio slot it will be mapped onto.

## What Changed

- Added `SpawnLifecycle::inherited_fds()` in
`codex-rs/core/src/unified_exec/process.rs` and threaded inherited fds
through `codex-rs/core/src/unified_exec/process_manager.rs` so
unified-exec can preserve required descriptors across both PTY and
no-stdin pipe spawn paths.
- Updated `codex-rs/core/src/tools/runtimes/shell/zsh_fork_backend.rs`
to expose the escalation socket fd through the spawn lifecycle.
- Added inherited-fd-aware spawn helpers in
`codex-rs/utils/pty/src/pty.rs` and `codex-rs/utils/pty/src/pipe.rs`,
including Unix pre-exec fd pruning that preserves requested inherited
fds while leaving `FD_CLOEXEC` descriptors alone. The pruning helper is
now named `close_inherited_fds_except()` to better describe that
behavior.
- Updated `codex-rs/shell-escalation/src/unix/escalate_client.rs` to
duplicate local stdio before transfer and send destination stdio numbers
in `SuperExecMessage`, so the wrapper keeps using its own
`stdin`/`stdout`/`stderr` until the escalated child takes over.
- Updated `codex-rs/shell-escalation/src/unix/escalate_server.rs` so the
server accepts the overlap case where a received fd reuses the same
stdio descriptor number that the child setup will target with `dup2`.
- Added comments around the PTY stdio wiring and the overlap regression
helper to make the fd handoff and controlling-terminal setup easier to
follow.

## Verification

- `cargo test -p codex-utils-pty`
- covers preserved-fd PTY spawn behavior, PTY resize, Python REPL
continuity, exec-failure reporting, and the no-stdin pipe path
- `cargo test -p codex-shell-escalation`
- covers duplicated-fd transfer on the client side and verifies the
overlap case by passing a pipe-backed stdin payload through the
server-side `dup2` path

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/13644).
* #14624
* __->__ #13644
This commit is contained in:
Michael Bolin
2026-03-13 13:25:31 -07:00
committed by GitHub
parent 014e19510d
commit ef37d313c6
10 changed files with 927 additions and 30 deletions

View File

@@ -28,6 +28,8 @@ pub use unix::ShellCommandExecutor;
#[cfg(unix)]
pub use unix::Stopwatch;
#[cfg(unix)]
pub use unix::escalate_protocol::ESCALATE_SOCKET_ENV_VAR;
#[cfg(unix)]
pub use unix::main_execve_wrapper;
#[cfg(unix)]
pub use unix::run_shell_escalation_execve_wrapper;

View File

@@ -1,6 +1,6 @@
use std::io;
use std::os::fd::AsFd;
use std::os::fd::AsRawFd;
use std::os::fd::FromRawFd as _;
use std::os::fd::OwnedFd;
use anyhow::Context as _;
@@ -28,6 +28,12 @@ fn get_escalate_client() -> anyhow::Result<AsyncDatagramSocket> {
Ok(unsafe { AsyncDatagramSocket::from_raw_fd(client_fd) }?)
}
fn duplicate_fd_for_transfer(fd: impl AsFd, name: &str) -> anyhow::Result<OwnedFd> {
fd.as_fd()
.try_clone_to_owned()
.with_context(|| format!("failed to duplicate {name} for escalation transfer"))
}
pub async fn run_shell_escalation_execve_wrapper(
file: String,
argv: Vec<String>,
@@ -62,11 +68,18 @@ pub async fn run_shell_escalation_execve_wrapper(
.context("failed to receive EscalateResponse")?;
match message.action {
EscalateAction::Escalate => {
// TODO: maybe we should send ALL open FDs (except the escalate client)?
// Duplicate stdio before transferring ownership to the server. The
// wrapper must keep using its own stdin/stdout/stderr until the
// escalated child takes over.
let destination_fds = [
io::stdin().as_raw_fd(),
io::stdout().as_raw_fd(),
io::stderr().as_raw_fd(),
];
let fds_to_send = [
unsafe { OwnedFd::from_raw_fd(io::stdin().as_raw_fd()) },
unsafe { OwnedFd::from_raw_fd(io::stdout().as_raw_fd()) },
unsafe { OwnedFd::from_raw_fd(io::stderr().as_raw_fd()) },
duplicate_fd_for_transfer(io::stdin(), "stdin")?,
duplicate_fd_for_transfer(io::stdout(), "stdout")?,
duplicate_fd_for_transfer(io::stderr(), "stderr")?,
];
// TODO: also forward signals over the super-exec socket
@@ -74,7 +87,7 @@ pub async fn run_shell_escalation_execve_wrapper(
client
.send_with_fds(
SuperExecMessage {
fds: fds_to_send.iter().map(AsRawFd::as_raw_fd).collect(),
fds: destination_fds.into_iter().collect(),
},
&fds_to_send,
)
@@ -115,3 +128,23 @@ pub async fn run_shell_escalation_execve_wrapper(
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::os::fd::AsRawFd;
use std::os::unix::net::UnixStream;
#[test]
fn duplicate_fd_for_transfer_does_not_close_original() {
let (left, _right) = UnixStream::pair().expect("socket pair");
let original_fd = left.as_raw_fd();
let duplicate = duplicate_fd_for_transfer(&left, "test fd").expect("duplicate fd");
assert_ne!(duplicate.as_raw_fd(), original_fd);
drop(duplicate);
assert_ne!(unsafe { libc::fcntl(original_fd, libc::F_GETFD) }, -1);
}
}

View File

@@ -319,16 +319,6 @@ async fn handle_escalate_session_with_policy(
));
}
if msg
.fds
.iter()
.any(|src_fd| fds.iter().any(|dst_fd| dst_fd.as_raw_fd() == *src_fd))
{
return Err(anyhow::anyhow!(
"overlapping fds not yet supported in SuperExecMessage"
));
}
let PreparedExec {
command,
cwd,
@@ -398,6 +388,7 @@ mod tests {
use codex_utils_absolute_path::AbsolutePathBuf;
use pretty_assertions::assert_eq;
use std::collections::HashMap;
use std::io::Write;
use std::os::fd::AsRawFd;
use std::os::fd::FromRawFd;
use std::path::PathBuf;
@@ -812,6 +803,126 @@ mod tests {
server_task.await?
}
/// Saves a target descriptor, closes it, and restores it when dropped.
///
/// The overlap regression test needs the next received `SCM_RIGHTS` handle
/// to land on a specific descriptor number such as stdin. Temporarily
/// closing the descriptor makes that allocation possible while still
/// letting the test put the process back the way it found it.
struct RestoredFd {
target_fd: i32,
original_fd: std::os::fd::OwnedFd,
}
impl RestoredFd {
/// Duplicates `target_fd`, then closes the original descriptor number.
///
/// The duplicate is kept alive so `Drop` can restore the original
/// process state after the test finishes.
fn close_temporarily(target_fd: i32) -> anyhow::Result<Self> {
let original_fd = unsafe { libc::dup(target_fd) };
if original_fd == -1 {
return Err(std::io::Error::last_os_error().into());
}
if unsafe { libc::close(target_fd) } == -1 {
let err = std::io::Error::last_os_error();
unsafe {
libc::close(original_fd);
}
return Err(err.into());
}
Ok(Self {
target_fd,
original_fd: unsafe { std::os::fd::OwnedFd::from_raw_fd(original_fd) },
})
}
}
/// Restores the original descriptor back onto its original fd number.
///
/// This keeps the overlap test self-contained even though it mutates the
/// current process's stdio table.
impl Drop for RestoredFd {
fn drop(&mut self) {
unsafe {
libc::dup2(self.original_fd.as_raw_fd(), self.target_fd);
}
}
}
#[tokio::test]
async fn handle_escalate_session_accepts_received_fds_that_overlap_destinations()
-> anyhow::Result<()> {
let _guard = ESCALATE_SERVER_TEST_LOCK.lock().await;
let mut pipe_fds = [0; 2];
if unsafe { libc::pipe(pipe_fds.as_mut_ptr()) } == -1 {
return Err(std::io::Error::last_os_error().into());
}
let read_end = unsafe { std::os::fd::OwnedFd::from_raw_fd(pipe_fds[0]) };
let mut write_end = unsafe { std::fs::File::from_raw_fd(pipe_fds[1]) };
// Force the receive-side overlap case for stdin.
//
// SCM_RIGHTS installs received descriptors into the lowest available fd
// numbers in the receiving process. The pipe is opened first so its
// read end does not consume fd 0. After stdin is temporarily closed,
// receiving `read_end` should reuse descriptor 0. The message below
// also asks the server to map that received fd to destination fd 0, so
// the pre-exec dup2 loop exercises the src_fd == dst_fd case.
let stdin_restore = RestoredFd::close_temporarily(libc::STDIN_FILENO)?;
let (server, client) = AsyncSocket::pair()?;
let server_task = tokio::spawn(handle_escalate_session_with_policy(
server,
Arc::new(DeterministicEscalationPolicy {
decision: EscalationDecision::escalate(EscalationExecution::Unsandboxed),
}),
Arc::new(ForwardingShellCommandExecutor),
CancellationToken::new(),
CancellationToken::new(),
));
client
.send(EscalateRequest {
file: PathBuf::from("/bin/sh"),
argv: vec![
"sh".to_string(),
"-c".to_string(),
"IFS= read -r line && [ \"$line\" = overlap-ok ]".to_string(),
],
workdir: AbsolutePathBuf::current_dir()?,
env: HashMap::new(),
})
.await?;
let response = client.receive::<EscalateResponse>().await?;
assert_eq!(
EscalateResponse {
action: EscalateAction::Escalate,
},
response
);
client
.send_with_fds(
SuperExecMessage {
fds: vec![libc::STDIN_FILENO],
},
&[read_end],
)
.await?;
write_end.write_all(b"overlap-ok\n")?;
drop(write_end);
let result = client.receive::<SuperExecResult>().await?;
assert_eq!(
0, result.exit_code,
"expected the escalated child to read the sent stdin payload even when the received fd reuses fd 0"
);
drop(stdin_restore);
server_task.await?
}
#[tokio::test]
async fn handle_escalate_session_passes_permissions_to_executor() -> anyhow::Result<()> {
let _guard = ESCALATE_SERVER_TEST_LOCK.lock().await;