mirror of
https://github.com/openai/codex.git
synced 2026-04-28 18:32:04 +03:00
4.1 KiB
4.1 KiB
PR #1852: exec: timeout on grandchildren
- URL: https://github.com/openai/codex/pull/1852
- Author: md-oai
- Created: 2025-08-05 18:04:58 UTC
- Updated: 2025-08-05 21:18:43 UTC
- Changes: +43/-2, Files changed: 1, Commits: 3
Description
We were enforcing the 10 s wall-clock limit only on the child process. If that child (bash) spawns grandchildren and we kill it on timeout, those grandchildren still have the original stdout/err pipe open, so the background tasks that are draining the pipes block forever
Full Diff
diff --git a/codex-rs/core/src/exec.rs b/codex-rs/core/src/exec.rs
index dce02cc5e2..a8bbd0f52c 100644
--- a/codex-rs/core/src/exec.rs
+++ b/codex-rs/core/src/exec.rs
@@ -344,8 +344,49 @@ pub(crate) async fn consume_truncated_output(
}
};
- let stdout = stdout_handle.await??;
- let stderr = stderr_handle.await??;
+ // Wait for the stdout/stderr collection tasks but guard against them
+ // hanging forever. In the normal case both pipes are closed once the child
+ // terminates so the tasks exit quickly. However, if the child process
+ // spawned grandchildren that inherited its stdout/stderr file descriptors
+ // those pipes may stay open after we `kill` the direct child on timeout.
+ // That would cause the `read_capped` tasks to block on `read()`
+ // indefinitely, effectively hanging the whole agent.
+
+ const IO_DRAIN_TIMEOUT_MS: u64 = 2_000; // 2 s should be plenty for local pipes
+
+ // We need mutable bindings so we can `abort()` them on timeout.
+ use tokio::task::JoinHandle;
+
+ async fn await_with_timeout(
+ handle: &mut JoinHandle<std::io::Result<Vec<u8>>>,
+ timeout: Duration,
+ ) -> std::io::Result<Vec<u8>> {
+ match tokio::time::timeout(timeout, &mut *handle).await {
+ Ok(join_res) => match join_res {
+ Ok(io_res) => io_res,
+ Err(join_err) => Err(std::io::Error::other(join_err)),
+ },
+ Err(_elapsed) => {
+ // Timeout: abort the task to avoid hanging on open pipes.
+ handle.abort();
+ Ok(Vec::new())
+ }
+ }
+ }
+
+ let mut stdout_handle = stdout_handle;
+ let mut stderr_handle = stderr_handle;
+
+ let stdout = await_with_timeout(
+ &mut stdout_handle,
+ Duration::from_millis(IO_DRAIN_TIMEOUT_MS),
+ )
+ .await?;
+ let stderr = await_with_timeout(
+ &mut stderr_handle,
+ Duration::from_millis(IO_DRAIN_TIMEOUT_MS),
+ )
+ .await?;
Ok(RawExecToolCallOutput {
exit_status,
Review Comments
codex-rs/core/src/exec.rs
- Created: 2025-08-05 18:42:23 UTC | Link: https://github.com/openai/codex/pull/1852#discussion_r2255069686
@@ -344,8 +344,50 @@ pub(crate) async fn consume_truncated_output(
}
};
- let stdout = stdout_handle.await??;
- let stderr = stderr_handle.await??;
+ // Wait for the stdout/stderr collection tasks but guard against them
+ // hanging forever. In the normal case both pipes are closed once the child
+ // terminates so the tasks exit quickly. However, if the child process
+ // spawned grandchildren that inherited its stdout/stderr file descriptors
+ // those pipes may stay open after we `kill` the direct child on timeout.
+ // That would cause the `read_capped` tasks to block on `read()`
+ // indefinitely, effectively hanging the whole agent.
+
+ const IO_DRAIN_TIMEOUT_MS: u64 = 2_000; // 2 s should be plenty for local pipes
+
+ // We need mutable bindings so we can `abort()` them on timeout.
+ use tokio::task::JoinHandle;
+
+ async fn await_with_timeout(
+ handle: &mut JoinHandle<std::io::Result<Vec<u8>>>,
+ timeout: Duration,
+ ) -> std::io::Result<Vec<u8>> {
+ tokio::select! {
+ join_res = &mut *handle => {
+ match join_res {
+ Ok(io_res) => io_res,
+ Err(join_err) => Err(std::io::Error::other(join_err)),
+ }
+ },
+ _ = tokio::time::sleep(timeout) => {
Can/should we use
use tokio::time::timeoutinstead?