Files
codex/prs/bolinfest/PR-1610.md
2025-09-02 15:17:45 -07:00

18 KiB
Raw Blame History

PR #1610: Interrupt bug

Description

Interrupt is currently buggy. It uses the buffered deltas

Full Diff

diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
index 2c5baf152f..d42d056a76 100644
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -990,6 +990,52 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
     sess.tx_event.send(event).await.ok();
 }
 
+// ---
+// Helpers --------------------------------------------------------------------
+//
+// When a turn is interrupted before Codex can deliver tool output(s) back to
+// the model, the next request can fail with a 400 from the OpenAI API:
+//   {"error": {"message": "No tool output found for function call call_XXXXX", ...}}
+// Historically this manifested as a confusing retry loop ("stream error: 400 …")
+// because we never learned about the missing `call_id` (the stream was aborted
+// before we observed the `ResponseEvent::OutputItemDone` that would have let us
+// record it in `pending_call_ids`).
+//
+// To make interruption robust we parse the error body for the offending call id
+// and add it to `pending_call_ids` so the very next retry can inject a synthetic
+// `FunctionCallOutput { content: "aborted" }` and satisfy the API contract.
+// -----------------------------------------------------------------------------
+fn extract_missing_tool_call_id(body: &str) -> Option<String> {
+    // Try to parse the canonical JSON error shape first.
+    if let Ok(v) = serde_json::from_str::<serde_json::Value>(body) {
+        if let Some(msg) = v
+            .get("error")
+            .and_then(|e| e.get("message"))
+            .and_then(|m| m.as_str())
+        {
+            if let Some(id) = extract_missing_tool_call_id_from_msg(msg) {
+                return Some(id);
+            }
+        }
+    }
+    // Fallback: scan the raw body.
+    extract_missing_tool_call_id_from_msg(body)
+}
+
+fn extract_missing_tool_call_id_from_msg(msg: &str) -> Option<String> {
+    const NEEDLE: &str = "No tool output found for function call";
+    let idx = msg.find(NEEDLE)?;
+    let rest = &msg[idx + NEEDLE.len()..];
+    // Find the beginning of the call id (typically starts with "call_").
+    let start = rest.find("call_")?;
+    let rest = &rest[start..];
+    // Capture valid id chars [A-Za-z0-9_-/]. Hyphen shows up in some IDs; be permissive.
+    let end = rest
+        .find(|c: char| !(c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '/'))
+        .unwrap_or(rest.len());
+    Some(rest[..end].to_string())
+}
+
 async fn run_turn(
     sess: &Session,
     sub_id: String,
@@ -1024,6 +1070,50 @@ async fn run_turn(
             Ok(output) => return Ok(output),
             Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted),
             Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)),
+            Err(CodexErr::UnexpectedStatus(status, body)) => {
+                // Detect the specific 400 "No tool output found for function call ..." error that
+                // occurs when a user interrupted before Codex could answer a tool call.
+                if status == reqwest::StatusCode::BAD_REQUEST {
+                    if let Some(call_id) = extract_missing_tool_call_id(&body) {
+                        {
+                            let mut state = sess.state.lock().unwrap();
+                            state.pending_call_ids.insert(call_id.clone());
+                        }
+                        // Surface a friendlier background event so users understand the recovery.
+                        sess
+                            .notify_background_event(
+                                &sub_id,
+                                format!(
+                                    "previous turn interrupted before responding to tool {call_id}; sending aborted output and retrying…",
+                                ),
+                            )
+                            .await;
+                        // Immediately retry the turn without consuming a provider stream retry budget.
+                        continue;
+                    }
+                }
+                // Fall through to generic retry path if we could not autorecover.
+                let e = CodexErr::UnexpectedStatus(status, body);
+                // Use the configured provider-specific stream retry budget.
+                let max_retries = sess.client.get_provider().stream_max_retries();
+                if retries < max_retries {
+                    retries += 1;
+                    let delay = backoff(retries);
+                    warn!(
+                        "stream disconnected - retrying turn ({retries}/{max_retries} in {delay:?})...",
+                    );
+                    sess.notify_background_event(
+                        &sub_id,
+                        format!(
+                            "stream error: {e}; retrying {retries}/{max_retries} in {delay:?}…",
+                        ),
+                    )
+                    .await;
+                    tokio::time::sleep(delay).await;
+                } else {
+                    return Err(e);
+                }
+            }
             Err(e) => {
                 // Use the configured provider-specific stream retry budget.
                 let max_retries = sess.client.get_provider().stream_max_retries();
@@ -1040,7 +1130,7 @@ async fn run_turn(
                     sess.notify_background_event(
                         &sub_id,
                         format!(
-                            "stream error: {e}; retrying {retries}/{max_retries} in {delay:?}…"
+                            "stream error: {e}; retrying {retries}/{max_retries} in {delay:?}…",
                         ),
                     )
                     .await;
diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs
index 7c825acd41..0f72f417dc 100644
--- a/codex-rs/tui/src/chatwidget.rs
+++ b/codex-rs/tui/src/chatwidget.rs
@@ -53,6 +53,7 @@ pub(crate) struct ChatWidget<'a> {
     token_usage: TokenUsage,
     reasoning_buffer: String,
     answer_buffer: String,
+    active_task_id: Option<String>,
 }
 
 #[derive(Clone, Copy, Eq, PartialEq)]
@@ -141,6 +142,7 @@ impl ChatWidget<'_> {
             token_usage: TokenUsage::default(),
             reasoning_buffer: String::new(),
             answer_buffer: String::new(),
+            active_task_id: None,
         }
     }
 
@@ -222,10 +224,30 @@ impl ChatWidget<'_> {
             self.conversation_history.add_user_message(text);
         }
         self.conversation_history.scroll_to_bottom();
+
+        // IMPORTANT: Starting a *new* user turn. Clear any partially streamed
+        // answer from a previous turn (e.g., one that was interrupted) so that
+        // the next AgentMessageDelta spawns a fresh agent message cell instead
+        // of overwriting the last one.
+        self.answer_buffer.clear();
+        self.reasoning_buffer.clear();
     }
 
     pub(crate) fn handle_codex_event(&mut self, event: Event) {
-        let Event { id, msg } = event;
+        // Retain the event ID so we can refer to it after destructuring.
+        let event_id = event.id.clone();
+        let Event { id: _, msg } = event;
+
+        // When we are in the middle of a task (active_task_id is Some) we drop
+        // streaming text/reasoning events for *other* task IDs. This prevents
+        // late tokens from an interrupted run from bleeding into the current
+        // answer.
+        let should_drop_streaming = self
+            .active_task_id
+            .as_ref()
+            .map(|active| active != &event_id)
+            .unwrap_or(false);
+
         match msg {
             EventMsg::SessionConfigured(event) => {
                 // Record session information at the top of the conversation.
@@ -246,6 +268,9 @@ impl ChatWidget<'_> {
                 self.request_redraw();
             }
             EventMsg::AgentMessage(AgentMessageEvent { message }) => {
+                if should_drop_streaming {
+                    return;
+                }
                 // if the answer buffer is empty, this means we haven't received any
                 // delta. Thus, we need to print the message as a new answer.
                 if self.answer_buffer.is_empty() {
@@ -259,6 +284,9 @@ impl ChatWidget<'_> {
                 self.request_redraw();
             }
             EventMsg::AgentMessageDelta(AgentMessageDeltaEvent { delta }) => {
+                if should_drop_streaming {
+                    return;
+                }
                 if self.answer_buffer.is_empty() {
                     self.conversation_history
                         .add_agent_message(&self.config, "".to_string());
@@ -269,6 +297,9 @@ impl ChatWidget<'_> {
                 self.request_redraw();
             }
             EventMsg::AgentReasoningDelta(AgentReasoningDeltaEvent { delta }) => {
+                if should_drop_streaming {
+                    return;
+                }
                 if self.reasoning_buffer.is_empty() {
                     self.conversation_history
                         .add_agent_reasoning(&self.config, "".to_string());
@@ -279,6 +310,9 @@ impl ChatWidget<'_> {
                 self.request_redraw();
             }
             EventMsg::AgentReasoning(AgentReasoningEvent { text }) => {
+                if should_drop_streaming {
+                    return;
+                }
                 // if the reasoning buffer is empty, this means we haven't received any
                 // delta. Thus, we need to print the message as a new reasoning.
                 if self.reasoning_buffer.is_empty() {
@@ -293,6 +327,10 @@ impl ChatWidget<'_> {
                 self.request_redraw();
             }
             EventMsg::TaskStarted => {
+                // New task has begun  update state and clear any stale buffers.
+                self.active_task_id = Some(event_id);
+                self.answer_buffer.clear();
+                self.reasoning_buffer.clear();
                 self.bottom_pane.clear_ctrl_c_quit_hint();
                 self.bottom_pane.set_task_running(true);
                 self.request_redraw();
@@ -300,6 +338,10 @@ impl ChatWidget<'_> {
             EventMsg::TaskComplete(TaskCompleteEvent {
                 last_agent_message: _,
             }) => {
+                // Task finished; clear active_task_id so that subsequent events are processed.
+                if self.active_task_id.as_ref() == Some(&event_id) {
+                    self.active_task_id = None;
+                }
                 self.bottom_pane.set_task_running(false);
                 self.request_redraw();
             }
@@ -309,16 +351,25 @@ impl ChatWidget<'_> {
                     .set_token_usage(self.token_usage.clone(), self.config.model_context_window);
             }
             EventMsg::Error(ErrorEvent { message }) => {
+                // Error events always get surfaced (even for stale task IDs) so that the user sees
+                // why a run stopped. However, only clear the running indicator if this is the
+                // active task.
+                if self.active_task_id.as_ref() == Some(&event_id) {
+                    self.bottom_pane.set_task_running(false);
+                    self.active_task_id = None;
+                }
                 self.conversation_history.add_error(message);
-                self.bottom_pane.set_task_running(false);
             }
             EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent {
                 command,
                 cwd,
                 reason,
             }) => {
+                if should_drop_streaming {
+                    return;
+                }
                 let request = ApprovalRequest::Exec {
-                    id,
+                    id: event_id,
                     command,
                     cwd,
                     reason,
@@ -330,6 +381,9 @@ impl ChatWidget<'_> {
                 reason,
                 grant_root,
             }) => {
+                if should_drop_streaming {
+                    return;
+                }
                 // ------------------------------------------------------------------
                 // Before we even prompt the user for approval we surface the patch
                 // summary in the main conversation so that the dialog appears in a
@@ -348,7 +402,7 @@ impl ChatWidget<'_> {
 
                 // Now surface the approval request in the BottomPane as before.
                 let request = ApprovalRequest::ApplyPatch {
-                    id,
+                    id: event_id,
                     reason,
                     grant_root,
                 };
@@ -360,6 +414,9 @@ impl ChatWidget<'_> {
                 command,
                 cwd: _,
             }) => {
+                if should_drop_streaming {
+                    return;
+                }
                 self.conversation_history
                     .add_active_exec_command(call_id, command);
                 self.request_redraw();
@@ -369,6 +426,9 @@ impl ChatWidget<'_> {
                 auto_approved,
                 changes,
             }) => {
+                if should_drop_streaming {
+                    return;
+                }
                 // Even when a patch is autoapproved we still display the
                 // summary so the user can follow along.
                 self.conversation_history
@@ -384,6 +444,9 @@ impl ChatWidget<'_> {
                 stdout,
                 stderr,
             }) => {
+                if should_drop_streaming {
+                    return;
+                }
                 self.conversation_history
                     .record_completed_exec_command(call_id, stdout, stderr, exit_code);
                 self.request_redraw();
@@ -394,11 +457,17 @@ impl ChatWidget<'_> {
                 tool,
                 arguments,
             }) => {
+                if should_drop_streaming {
+                    return;
+                }
                 self.conversation_history
                     .add_active_mcp_tool_call(call_id, server, tool, arguments);
                 self.request_redraw();
             }
             EventMsg::McpToolCallEnd(mcp_tool_call_end_event) => {
+                if should_drop_streaming {
+                    return;
+                }
                 let success = mcp_tool_call_end_event.is_success();
                 let McpToolCallEndEvent { call_id, result } = mcp_tool_call_end_event;
                 self.conversation_history

Review Comments

codex-rs/tui/src/chatwidget.rs

@@ -222,10 +224,30 @@ impl ChatWidget<'_> {
             self.conversation_history.add_user_message(text);
         }
         self.conversation_history.scroll_to_bottom();
+
+        // IMPORTANT: Starting a *new* user turn. Clear any partially streamed
+        // answer from a previous turn (e.g., one that was interrupted) so that
+        // the next AgentMessageDelta spawns a fresh agent message cell instead
+        // of overwriting the last one.
+        self.answer_buffer.clear();
+        self.reasoning_buffer.clear();
     }
 
     pub(crate) fn handle_codex_event(&mut self, event: Event) {
-        let Event { id, msg } = event;
+        // Retain the event ID so we can refer to it after destructuring.
+        let event_id = event.id.clone();
+        let Event { id: _, msg } = event;

Why did this change? Why not destructure as before without cloning? I would defer the clone() until it is necessary (i.e., you need to pass the id by value to another function).

If you just want to change the name:

let Event { id: event_id, msg } = event;
@@ -246,6 +268,9 @@ impl ChatWidget<'_> {
                 self.request_redraw();
             }
             EventMsg::AgentMessage(AgentMessageEvent { message }) => {
+                if should_drop_streaming {
+                    return;
+                }

Admittedly, this is my personal style, but I think it has merit: anytime you have an early return from a block of code, I would put a blank line after the closing } of the early return to help call attention to the fact that it is not straight-line code.

@@ -309,16 +351,25 @@ impl ChatWidget<'_> {
                     .set_token_usage(self.token_usage.clone(), self.config.model_context_window);
             }
             EventMsg::Error(ErrorEvent { message }) => {
+                // Error events always get surfaced (even for stale task IDs) so that the user sees
+                // why a run stopped. However, only clear the running indicator if this is the
+                // active task.
+                if self.active_task_id.as_ref() == Some(&event_id) {
+                    self.bottom_pane.set_task_running(false);
+                    self.active_task_id = None;
+                }
                 self.conversation_history.add_error(message);
-                self.bottom_pane.set_task_running(false);
             }
             EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent {
                 command,
                 cwd,
                 reason,
             }) => {
+                if should_drop_streaming {

Are you sure we should drop in this case: don't we need to ensure this request is displayed to the user? Am I misunderstanding?

@@ -330,6 +381,9 @@ impl ChatWidget<'_> {
                 reason,
                 grant_root,
             }) => {
+                if should_drop_streaming {

Seeing this if in what feels like the majority of cases makes me wonder if there's a cleaner way to do this so we don't have to copy/paste this so much?