Files
codex/prs/bolinfest/PR-1908.md
2025-09-02 15:17:45 -07:00

12 KiB
Raw Blame History

PR #1908: Ensure exec command end always emitted

Description

Summary

  • defer ExecCommandEnd emission until after sandbox resolution
  • make sandbox error handler return final exec output and response
  • align sandbox error stderr with response content and rename to final_output
  • replace unstable let chains in client command header logic

Testing

  • just fmt
  • just fix
  • cargo test --all-features (fails: NotPresent in core/tests/client.rs)

https://chatgpt.com/codex/tasks/task_i_6893e63b0c408321a8e1ff2a052c4c51

Full Diff

diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
index 98d13b4cd6..4a4faa84ee 100644
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -46,6 +46,7 @@ use crate::conversation_history::ConversationHistory;
 use crate::error::CodexErr;
 use crate::error::Result as CodexResult;
 use crate::error::SandboxErr;
+use crate::error::get_error_message_ui;
 use crate::exec::ExecParams;
 use crate::exec::ExecToolCallOutput;
 use crate::exec::SandboxType;
@@ -468,6 +469,57 @@ impl Session {
             }
         }
     }
+    /// Runs the exec tool call and emits events for the begin and end of the
+    /// command even on error.
+    ///
+    /// Returns the output of the exec tool call.
+    async fn run_exec_with_events<'a>(
+        &self,
+        turn_diff_tracker: &mut TurnDiffTracker,
+        begin_ctx: ExecCommandContext,
+        exec_args: ExecInvokeArgs<'a>,
+    ) -> crate::error::Result<ExecToolCallOutput> {
+        let is_apply_patch = begin_ctx.apply_patch.is_some();
+        let sub_id = begin_ctx.sub_id.clone();
+        let call_id = begin_ctx.call_id.clone();
+
+        self.on_exec_command_begin(turn_diff_tracker, begin_ctx.clone())
+            .await;
+
+        let result = process_exec_tool_call(
+            exec_args.params,
+            exec_args.sandbox_type,
+            exec_args.ctrl_c,
+            exec_args.sandbox_policy,
+            exec_args.codex_linux_sandbox_exe,
+            exec_args.stdout_stream,
+        )
+        .await;
+
+        let output_stderr;
+        let borrowed: &ExecToolCallOutput = match &result {
+            Ok(output) => output,
+            Err(e) => {
+                output_stderr = ExecToolCallOutput {
+                    exit_code: -1,
+                    stdout: String::new(),
+                    stderr: get_error_message_ui(e),
+                    duration: Duration::default(),
+                };
+                &output_stderr
+            }
+        };
+        self.on_exec_command_end(
+            turn_diff_tracker,
+            &sub_id,
+            &call_id,
+            borrowed,
+            is_apply_patch,
+        )
+        .await;
+
+        result
+    }
 
     /// Helper that emits a BackgroundEvent with the given message. This keeps
     /// the callsites terse so adding more diagnostics does not clutter the
@@ -1717,6 +1769,15 @@ fn parse_container_exec_arguments(
     }
 }
 
+pub struct ExecInvokeArgs<'a> {
+    pub params: ExecParams,
+    pub sandbox_type: SandboxType,
+    pub ctrl_c: Arc<Notify>,
+    pub sandbox_policy: &'a SandboxPolicy,
+    pub codex_linux_sandbox_exe: &'a Option<PathBuf>,
+    pub stdout_stream: Option<StdoutStream>,
+}
+
 fn maybe_run_with_user_profile(params: ExecParams, sess: &Session) -> ExecParams {
     if sess.shell_environment_policy.use_profile {
         let command = sess
@@ -1887,23 +1948,26 @@ async fn handle_container_exec_with_params(
             },
         ),
     };
-    sess.on_exec_command_begin(turn_diff_tracker, exec_command_context.clone())
-        .await;
 
     let params = maybe_run_with_user_profile(params, sess);
-    let output_result = process_exec_tool_call(
-        params.clone(),
-        sandbox_type,
-        sess.ctrl_c.clone(),
-        &sess.sandbox_policy,
-        &sess.codex_linux_sandbox_exe,
-        Some(StdoutStream {
-            sub_id: sub_id.clone(),
-            call_id: call_id.clone(),
-            tx_event: sess.tx_event.clone(),
-        }),
-    )
-    .await;
+    let output_result = sess
+        .run_exec_with_events(
+            turn_diff_tracker,
+            exec_command_context.clone(),
+            ExecInvokeArgs {
+                params: params.clone(),
+                sandbox_type,
+                ctrl_c: sess.ctrl_c.clone(),
+                sandbox_policy: &sess.sandbox_policy,
+                codex_linux_sandbox_exe: &sess.codex_linux_sandbox_exe,
+                stdout_stream: Some(StdoutStream {
+                    sub_id: sub_id.clone(),
+                    call_id: call_id.clone(),
+                    tx_event: sess.tx_event.clone(),
+                }),
+            },
+        )
+        .await;
 
     match output_result {
         Ok(output) => {
@@ -1914,24 +1978,14 @@ async fn handle_container_exec_with_params(
                 duration,
             } = &output;
 
-            sess.on_exec_command_end(
-                turn_diff_tracker,
-                &sub_id,
-                &call_id,
-                &output,
-                exec_command_context.apply_patch.is_some(),
-            )
-            .await;
-
             let is_success = *exit_code == 0;
             let content = format_exec_output(
                 if is_success { stdout } else { stderr },
                 *exit_code,
                 *duration,
             );
-
             ResponseInputItem::FunctionCallOutput {
-                call_id,
+                call_id: call_id.clone(),
                 output: FunctionCallOutputPayload {
                     content,
                     success: Some(is_success),
@@ -1949,16 +2003,13 @@ async fn handle_container_exec_with_params(
             )
             .await
         }
-        Err(e) => {
-            // Handle non-sandbox errors
-            ResponseInputItem::FunctionCallOutput {
-                call_id,
-                output: FunctionCallOutputPayload {
-                    content: format!("execution error: {e}"),
-                    success: None,
-                },
-            }
-        }
+        Err(e) => ResponseInputItem::FunctionCallOutput {
+            call_id: call_id.clone(),
+            output: FunctionCallOutputPayload {
+                content: format!("execution error: {e}"),
+                success: None,
+            },
+        },
     }
 }
 
@@ -1973,7 +2024,6 @@ async fn handle_sandbox_error(
     let call_id = exec_command_context.call_id.clone();
     let sub_id = exec_command_context.sub_id.clone();
     let cwd = exec_command_context.cwd.clone();
-    let is_apply_patch = exec_command_context.apply_patch.is_some();
 
     // Early out if either the user never wants to be asked for approval, or
     // we're letting the model manage escalation requests. Otherwise, continue
@@ -2039,24 +2089,26 @@ async fn handle_sandbox_error(
             sess.notify_background_event(&sub_id, "retrying command without sandbox")
                 .await;
 
-            sess.on_exec_command_begin(turn_diff_tracker, exec_command_context)
-                .await;
-
             // This is an escalated retry; the policy will not be
             // examined and the sandbox has been set to `None`.
-            let retry_output_result = process_exec_tool_call(
-                params,
-                SandboxType::None,
-                sess.ctrl_c.clone(),
-                &sess.sandbox_policy,
-                &sess.codex_linux_sandbox_exe,
-                Some(StdoutStream {
-                    sub_id: sub_id.clone(),
-                    call_id: call_id.clone(),
-                    tx_event: sess.tx_event.clone(),
-                }),
-            )
-            .await;
+            let retry_output_result = sess
+                .run_exec_with_events(
+                    turn_diff_tracker,
+                    exec_command_context.clone(),
+                    ExecInvokeArgs {
+                        params,
+                        sandbox_type: SandboxType::None,
+                        ctrl_c: sess.ctrl_c.clone(),
+                        sandbox_policy: &sess.sandbox_policy,
+                        codex_linux_sandbox_exe: &sess.codex_linux_sandbox_exe,
+                        stdout_stream: Some(StdoutStream {
+                            sub_id: sub_id.clone(),
+                            call_id: call_id.clone(),
+                            tx_event: sess.tx_event.clone(),
+                        }),
+                    },
+                )
+                .await;
 
             match retry_output_result {
                 Ok(retry_output) => {
@@ -2067,15 +2119,6 @@ async fn handle_sandbox_error(
                         duration,
                     } = &retry_output;
 
-                    sess.on_exec_command_end(
-                        turn_diff_tracker,
-                        &sub_id,
-                        &call_id,
-                        &retry_output,
-                        is_apply_patch,
-                    )
-                    .await;
-
                     let is_success = *exit_code == 0;
                     let content = format_exec_output(
                         if is_success { stdout } else { stderr },
@@ -2084,23 +2127,20 @@ async fn handle_sandbox_error(
                     );
 
                     ResponseInputItem::FunctionCallOutput {
-                        call_id,
+                        call_id: call_id.clone(),
                         output: FunctionCallOutputPayload {
                             content,
                             success: Some(is_success),
                         },
                     }
                 }
-                Err(e) => {
-                    // Handle retry failure
-                    ResponseInputItem::FunctionCallOutput {
-                        call_id,
-                        output: FunctionCallOutputPayload {
-                            content: format!("retry failed: {e}"),
-                            success: None,
-                        },
-                    }
-                }
+                Err(e) => ResponseInputItem::FunctionCallOutput {
+                    call_id: call_id.clone(),
+                    output: FunctionCallOutputPayload {
+                        content: format!("retry failed: {e}"),
+                        success: None,
+                    },
+                },
             }
         }
         ReviewDecision::Denied | ReviewDecision::Abort => {
diff --git a/codex-rs/core/src/error.rs b/codex-rs/core/src/error.rs
index 9cdc4eb544..537f4a0361 100644
--- a/codex-rs/core/src/error.rs
+++ b/codex-rs/core/src/error.rs
@@ -132,3 +132,10 @@ impl CodexErr {
         (self as &dyn std::any::Any).downcast_ref::<T>()
     }
 }
+
+pub fn get_error_message_ui(e: &CodexErr) -> String {
+    match e {
+        CodexErr::Sandbox(SandboxErr::Denied(_, _, stderr)) => stderr.to_string(),
+        _ => e.to_string(),
+    }
+}

Review Comments

codex-rs/core/src/codex.rs

@@ -1717,6 +1766,15 @@ fn parse_container_exec_arguments(
     }
 }
 
+pub struct ExecInvokeArgs<'a> {
+    pub params: crate::exec::ExecParams,

I would make more of these top-level imports so this is a bit easier to read.

@@ -469,6 +470,54 @@ impl Session {
         }
     }
 
+    async fn run_exec_with_events<'a>(

This function returns Result, so can you add a docstring to comment that we guarantee that both on_exec_command_begin() and on_exec_command_end() are called, even if this function returns Err?

Also update to note that this is used both when the shell call is done originally as well as for the sandbox retry.