Test passing

feature: Add !cmd outputs to model conversation history
Feature: Persist both the LocalShellCall metadata and the FunctionCallOutput from !cmd executions so the model sees each command invocation and its stdout in the conversation history. Flow: UserShellCommandTask builds a shell ToolCall, lets the runtime run it, then records a LocalShellCall followed by the resulting FunctionCallOutput via Session::record_conversation_items so both items land in the session history. Abort/Ctrl-C: If the runtime returns "aborted" (the Ctrl-C path) or handle_tool_call errors, we mark the LocalShellCall as Incomplete and emit a FunctionCallOutput with success=false so downstream consumers treat it as a failed or interrupted command.],workdir:/Users/abhishek/code/codex}
2026-04-22 23:41:46 +03:00 · 2025-11-02 01:09:06 -07:00 · 2025-11-02 00:58:03 -07:00
2 changed files with 139 additions and 3 deletions
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -2834,6 +2834,68 @@ mod tests {
        }
    }

+    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+    async fn user_shell_command_records_history_pair() {
+        use codex_protocol::models::ResponseItem;
+        use std::time::Duration;
+
+        let (sess, _tc, rx) = make_session_and_context_with_rx();
+
+        // Kick off a simple shell command via the helper.
+        let mut previous_context: Option<Arc<TurnContext>> = None;
+        crate::codex::handlers::run_user_shell_command(
+            &sess,
+            "sub-user-shell".to_string(),
+            "echo hello-codex".to_string(),
+            &mut previous_context,
+        )
+        .await;
+
+        // Wait until we observe ExecCommandEnd to ensure command finished and history appended.
+        let mut saw_exec_end = false;
+        for _ in 0..50 {
+            match tokio::time::timeout(Duration::from_millis(200), rx.recv()).await {
+                Ok(Ok(event)) => match event.msg {
+                    EventMsg::ExecCommandEnd(_) => {
+                        saw_exec_end = true;
+                        break;
+                    }
+                    _ => {}
+                },
+                _ => {}
+            }
+        }
+        assert!(saw_exec_end, "did not observe ExecCommandEnd event");
+
+        // Inspect conversation history for the paired items (LocalShellCall -> FunctionCallOutput).
+        // Recording happens after ExecCommandEnd, so allow a brief window for the history write.
+        let mut found_pair = false;
+        let mut last_history_snapshot = Vec::new();
+        for _ in 0..50 {
+            let history = sess.clone_history().await.get_history();
+            last_history_snapshot = history.clone();
+            let mut seen = false;
+            for idx in (0..history.len()).rev() {
+                if let ResponseItem::LocalShellCall { call_id: Some(cid), .. } = &history[idx] {
+                    if let Some(next) = history.get(idx + 1) {
+                        if let ResponseItem::FunctionCallOutput { call_id, output } = next {
+                            if call_id == cid && output.content.contains("hello-codex") {
+                                seen = true;
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+            if seen {
+                found_pair = true;
+                break;
+            }
+            tokio::time::sleep(std::time::Duration::from_millis(20)).await;
+        }
+        assert!(found_pair, "expected LocalShellCall followed by FunctionCallOutput with matching call_id and output containing 'hello-codex'\nFull history: {:?}", last_history_snapshot);
+    }
+
    fn sample_rollout(
        session: &Session,
        turn_context: &TurnContext,
--- a/codex-rs/core/src/tasks/user_shell.rs
+++ b/codex-rs/core/src/tasks/user_shell.rs
@@ -1,6 +1,12 @@
 use std::sync::Arc;

 use async_trait::async_trait;
+use codex_protocol::models::FunctionCallOutputPayload;
+use codex_protocol::models::LocalShellAction;
+use codex_protocol::models::LocalShellExecAction;
+use codex_protocol::models::LocalShellStatus;
+use codex_protocol::models::ResponseInputItem;
+use codex_protocol::models::ResponseItem;
 use codex_protocol::models::ShellToolCallParams;
 use codex_protocol::user_input::UserInput;
 use tokio::sync::Mutex;
@@ -79,13 +85,14 @@ impl SessionTask for UserShellCommandTask {
        };

        let params = ShellToolCallParams {
-            command: shell_invocation,
+            command: shell_invocation.clone(),
            workdir: None,
            timeout_ms: None,
            with_escalated_permissions: None,
            justification: None,
        };

+        let params_timeout_ms = params.timeout_ms.clone();
        let tool_call = ToolCall {
            tool_name: USER_SHELL_TOOL_NAME.to_string(),
            call_id: Uuid::new_v4().to_string(),
@@ -101,11 +108,78 @@ impl SessionTask for UserShellCommandTask {
            Arc::clone(&tracker),
        );

-        if let Err(err) = runtime
+        let call_id = tool_call.call_id.clone();
+        match runtime
            .handle_tool_call(tool_call, cancellation_token)
            .await
        {
-            error!("user shell command failed: {err:?}");
+            Ok(resp) => {
+                let mut status = LocalShellStatus::Completed;
+
+                // Special-case 'aborted' commands to mark failure.
+                let mut output_item: ResponseItem = resp.clone().into();
+                if let ResponseInputItem::FunctionCallOutput { output, .. } = &resp {
+                    if output.content == "aborted" {
+                        status = LocalShellStatus::Incomplete;
+                        output_item = ResponseItem::FunctionCallOutput {
+                            call_id: call_id.clone(),
+                            output: FunctionCallOutputPayload {
+                                content: "aborted".to_string(),
+                                success: Some(false),
+                                ..Default::default()
+                            },
+                        };
+                    }
+                }
+
+                let local_call = ResponseItem::LocalShellCall {
+                    id: None,
+                    call_id: Some(call_id.clone()),
+                    status,
+                    action: LocalShellAction::Exec(LocalShellExecAction {
+                        command: shell_invocation.clone(),
+                        timeout_ms: params_timeout_ms,
+                        working_directory: Some(turn_context.cwd.to_string_lossy().into_owned()),
+                        // The Responses API expects `env` to be an object with string keys/values.
+                        // Sending `null` here yields an `invalid_type` error, so we send `{}`.
+                        env: Some(std::collections::HashMap::new()),
+                        user: None,
+                    }),
+                };
+
+                session
+                    .record_conversation_items(turn_context.as_ref(), &[local_call, output_item])
+                    .await;
+            }
+            Err(err) => {
+                error!("user shell command failed: {err:?}");
+
+                let local_call = ResponseItem::LocalShellCall {
+                    id: None,
+                    call_id: Some(call_id.clone()),
+                    status: LocalShellStatus::Incomplete,
+                    action: LocalShellAction::Exec(LocalShellExecAction {
+                        // Clone because the original invocation was moved into `params` above.
+                        command: shell_invocation.clone(),
+                        timeout_ms: params_timeout_ms,
+                        working_directory: Some(turn_context.cwd.to_string_lossy().into_owned()),
+                        // Match success path: ensure an empty object instead of `null`.
+                        env: Some(std::collections::HashMap::new()),
+                        user: None,
+                    }),
+                };
+                let output_item = ResponseItem::FunctionCallOutput {
+                    call_id: call_id.clone(),
+                    output: FunctionCallOutputPayload {
+                        content: err.to_string(),
+                        success: Some(false),
+                        ..Default::default()
+                    },
+                };
+                session
+                    .record_conversation_items(turn_context.as_ref(), &[local_call, output_item])
+                    .await;
+            }
        }
        None
    }