codex/prs/bolinfest/PR-1545.md

# PR #1545: Add CLI integration tests

- URL: https://github.com/openai/codex/pull/1545
- Author: aibrahim-oai
- Created: 2025-07-11 21:03:16 UTC
- Updated: 2025-07-17 16:38:35 UTC
- Changes: +236/-0, Files changed: 3, Commits: 12

## Description

## Summary
- add new integration tests for the Rust CLI
- test a basic single-turn response
- validate shell tool invocation flow
- update Cargo.lock for test dependencies

## Testing
- `cargo fmt --all`
- `cargo clippy -p codex-cli --tests --all-features -- -D warnings`
- `cargo test -p codex-cli --test integration -- --nocapture`


------
https://chatgpt.com/codex/tasks/task_i_68717125ff6083219bf892e0bdf14427

## Full Diff

```diff
diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock
index e59dbfa255..2b16b82e48 100644
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -617,6 +617,7 @@ name = "codex-cli"
 version = "0.0.0"
 dependencies = [
  "anyhow",
+ "assert_cmd",
  "clap",
  "clap_complete",
  "codex-chatgpt",
@@ -627,10 +628,14 @@ dependencies = [
  "codex-login",
  "codex-mcp-server",
  "codex-tui",
+ "indoc",
+ "predicates",
  "serde_json",
+ "tempfile",
  "tokio",
  "tracing",
  "tracing-subscriber",
+ "wiremock",
 ]

 [[package]]
diff --git a/codex-rs/cli/Cargo.toml b/codex-rs/cli/Cargo.toml
index 943788157b..9932e89caa 100644
--- a/codex-rs/cli/Cargo.toml
+++ b/codex-rs/cli/Cargo.toml
@@ -36,3 +36,11 @@ tokio = { version = "1", features = [
 ] }
 tracing = "0.1.41"
 tracing-subscriber = "0.3.19"
+
+[dev-dependencies]
+assert_cmd = "2"
+predicates = "3"
+tempfile = "3"
+wiremock = "0.6"
+tokio = { version = "1", features = ["macros", "rt-multi-thread"] }
+indoc = "2"
diff --git a/codex-rs/cli/tests/integration.rs b/codex-rs/cli/tests/integration.rs
new file mode 100644
index 0000000000..6054dbe3d3
--- /dev/null
+++ b/codex-rs/cli/tests/integration.rs
@@ -0,0 +1,223 @@
+#![allow(clippy::unwrap_used)]
+
+//! End-to-end integration tests for the `codex` CLI.
+//!
+//! These spin up a local [`wiremock`][] server to stand in for the MCP server
+//! and then run the real compiled `codex` binary against it. The goal is to
+//! verify the high-level request/response flow rather than the details of the
+//! individual async functions.
+//!
+//! [`wiremock`]: https://docs.rs/wiremock
+
+use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use predicates::prelude::*;
+use std::fs;
+use std::path::Path;
+use tempfile::TempDir;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+// ----- tests -----
+
+/// Sends a single simple prompt and verifies that the streamed response is
+/// surfaced to the user. This exercises the most common "ask a question, get a
+/// textual answer" flow.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn full_conversation_turn_integration() {
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!("Skipping test because network is disabled");
+        return;
+    }
+
+    let server = MockServer::start().await;
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(
+            ResponseTemplate::new(200)
+                .insert_header("content-type", "text/event-stream")
+                .set_body_raw(sse_message("Hello, world."), "text/event-stream"),
+        )
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    // Disable retries — the mock server will fail hard if we make an unexpected
+    // request, so retries only slow the test down.
+    unsafe {
+        std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0");
+        std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0");
+    }
+
+    let codex_home = TempDir::new().unwrap();
+    let sandbox = TempDir::new().unwrap();
+    write_config(codex_home.path(), &server);
+
+    // Capture the agent's final message in a file so we can assert on it precisely.
+    let last_message_file = sandbox.path().join("last_message.txt");
+
+    let mut cmd = assert_cmd::Command::cargo_bin("codex").unwrap();
+    cmd.env("CODEX_HOME", codex_home.path())
+        .current_dir(sandbox.path())
+        .arg("exec")
+        .arg("--skip-git-repo-check")
+        .arg("--output-last-message")
+        .arg(&last_message_file)
+        .arg("Hello");
+
+    cmd.assert()
+        .success()
+        .stdout(predicate::str::contains("Hello, world."));
+
+    // Assert on the captured last message file (more robust than stdout formatting).
+    let last = fs::read_to_string(&last_message_file).unwrap();
+    let expected = "Hello, world.";
+    assert_eq!(last.trim(), expected);
+}
+
+/// Simulates a tool invocation (`shell`) followed by a second assistant message
+/// once the tool call completes.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn tool_invocation_flow() {
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!("Skipping test because network is disabled");
+        return;
+    }
+
+    let server = MockServer::start().await;
+
+    // The first request returns a function-call item; the second returns the
+    // final assistant message. Use an atomic counter to serve them in order.
+    struct SeqResponder {
+        count: std::sync::atomic::AtomicUsize,
+    }
+    impl wiremock::Respond for SeqResponder {
+        fn respond(&self, _: &wiremock::Request) -> ResponseTemplate {
+            use std::sync::atomic::Ordering;
+            match self.count.fetch_add(1, Ordering::SeqCst) {
+                0 => ResponseTemplate::new(200)
+                    .insert_header("content-type", "text/event-stream")
+                    .set_body_raw(sse_function_call(), "text/event-stream"),
+                _ => ResponseTemplate::new(200)
+                    .insert_header("content-type", "text/event-stream")
+                    .set_body_raw(sse_final_after_call(), "text/event-stream"),
+            }
+        }
+    }
+
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(SeqResponder {
+            count: std::sync::atomic::AtomicUsize::new(0),
+        })
+        .expect(2)
+        .mount(&server)
+        .await;
+
+    unsafe {
+        std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0");
+        std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0");
+    }
+
+    let codex_home = TempDir::new().unwrap();
+    let sandbox = TempDir::new().unwrap();
+    write_config(codex_home.path(), &server);
+
+    // Capture final assistant message after tool invocation.
+    let last_message_file = sandbox.path().join("last_message.txt");
+
+    let mut cmd = assert_cmd::Command::cargo_bin("codex").unwrap();
+    cmd.env("CODEX_HOME", codex_home.path())
+        .current_dir(sandbox.path())
+        .arg("exec")
+        .arg("--skip-git-repo-check")
+        .arg("--output-last-message")
+        .arg(&last_message_file)
+        .arg("Run shell");
+
+    cmd.assert()
+        .success()
+        .stdout(predicate::str::contains("exec echo hi"))
+        .stdout(predicate::str::contains("hi"));
+
+    // Assert that the final assistant message (second response) was 'done'.
+    let last = fs::read_to_string(&last_message_file).unwrap();
+    let expected = "done";
+    assert_eq!(last.trim(), expected);
+}
+
+/// Write a minimal `config.toml` pointing the CLI at the mock server.
+fn write_config(codex_home: &Path, server: &MockServer) {
+    fs::write(
+        codex_home.join("config.toml"),
+        format!(
+            r#"
+model_provider = "mock"
+model = "test-model"
+
+[model_providers.mock]
+name = "mock"
+base_url = "{}/v1"
+env_key = "PATH"
+wire_api = "responses"
+"#,
+            server.uri()
+        ),
+    )
+    .unwrap();
+}
+
+/// Small helper to generate an SSE stream with a single assistant message.
+fn sse_message(text: &str) -> String {
+    const TEMPLATE: &str = r#"event: response.output_item.done
+data: {"type":"response.output_item.done","item":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"TEXT_PLACEHOLDER"}]}}
+
+event: response.completed
+data: {"type":"response.completed","response":{"id":"resp1","output":[]}}
+
+
+"#;
+
+    TEMPLATE.replace("TEXT_PLACEHOLDER", text)
+}
+
+/// Helper to craft an SSE stream that returns a `function_call`.
+fn sse_function_call() -> String {
+    let call = serde_json::json!({
+        "type": "response.output_item.done",
+        "item": {
+            "type": "function_call",
+            "name": "shell",
+            "arguments": "{\"command\":[\"echo\",\"hi\"]}",
+            "call_id": "call1"
+        }
+    });
+    let completed = serde_json::json!({
+        "type": "response.completed",
+        "response": {"id": "resp1", "output": []}
+    });
+
+    format!(
+        "event: response.output_item.done\ndata: {call}\n\n\
+event: response.completed\ndata: {completed}\n\n\n"
+    )
+}
+
+/// SSE stream for the assistant's final message after the tool call returns.
+fn sse_final_after_call() -> String {
+    let msg = serde_json::json!({
+        "type": "response.output_item.done",
+        "item": {"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "done"}]}
+    });
+    let completed = serde_json::json!({
+        "type": "response.completed",
+        "response": {"id": "resp2", "output": []}
+    });
+
+    format!(
+        "event: response.output_item.done\ndata: {msg}\n\n\
+event: response.completed\ndata: {completed}\n\n\n"
+    )
+}
```

## Review Comments

### codex-rs/cli/tests/integration.rs

- Created: 2025-07-12 17:41:32 UTC | Link: https://github.com/openai/codex/pull/1545#discussion_r2202841506

```diff
@@ -0,0 +1,171 @@
+#![allow(clippy::unwrap_used)]
+
+use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use predicates::prelude::*;
+use std::fs;
+use std::path::Path;
+use tempfile::TempDir;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+fn write_config(dir: &Path, server: &MockServer) {
```

> Similar to a comment I made on another PR, please list all of these helper functions below the tests. The tests are the most important thing in this file.

- Created: 2025-07-12 17:43:25 UTC | Link: https://github.com/openai/codex/pull/1545#discussion_r2202842748

```diff
@@ -0,0 +1,171 @@
+#![allow(clippy::unwrap_used)]
+
+use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use predicates::prelude::*;
+use std::fs;
+use std::path::Path;
+use tempfile::TempDir;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+fn write_config(dir: &Path, server: &MockServer) {
+    fs::write(
+        dir.join("config.toml"),
+        format!(
+            r#"model_provider = "mock"
+model = "test-model"
+[model_providers.mock]
+name = "mock"
+base_url = "{}/v1"
+env_key = "PATH"
+wire_api = "responses"
+"#,
```

> Since the leading newline at the start of the content doesn't hurt anything, I would do this for readability:
>
> ```suggestion
>             r#"
> model_provider = "mock"
> model = "test-model"
> [model_providers.mock]
> name = "mock"
> base_url = "{}/v1"
> env_key = "PATH"
> wire_api = "responses"
> "#,
> ```
>
> You can also consider https://crates.io/crates/indoc if you feel strongly.

- Created: 2025-07-12 17:55:01 UTC | Link: https://github.com/openai/codex/pull/1545#discussion_r2202846863

```diff
@@ -0,0 +1,171 @@
+#![allow(clippy::unwrap_used)]
+
+use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use predicates::prelude::*;
+use std::fs;
+use std::path::Path;
+use tempfile::TempDir;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+fn write_config(dir: &Path, server: &MockServer) {
+    fs::write(
+        dir.join("config.toml"),
+        format!(
+            r#"model_provider = "mock"
+model = "test-model"
+[model_providers.mock]
+name = "mock"
+base_url = "{}/v1"
+env_key = "PATH"
+wire_api = "responses"
+"#,
+            server.uri()
+        ),
+    )
+    .unwrap();
+}
+
+fn sse_message(text: &str) -> String {
+    format!(
+        "event: response.output_item.done\n\
+data: {{\"type\":\"response.output_item.done\",\"item\":{{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{{\"type\":\"output_text\",\"text\":\"{text}\"}}]}}}}\n\n\
+event: response.completed\n\
+data: {{\"type\":\"response.completed\",\"response\":{{\"id\":\"resp1\",\"output\":[]}}}}\n\n\n"
+    )
```

> In this case, the escaping of `{` makes this so hard to read that I would consider using `replace()`:
>
> ```suggestion
>     let template = r#"event: response.output_item.done
> data: {"type":"response.output_item.done","item":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"TEXT_PLACEHOLDER"}]}}
>
> event: response.completed
> data: {"type":"response.completed","response":{"id":"resp1","output":[]}}
>
>
> "#;
>     template.replace("TEXT_PLACEHOLDER", text);
> ```

- Created: 2025-07-12 17:59:03 UTC | Link: https://github.com/openai/codex/pull/1545#discussion_r2202848480

```diff
@@ -0,0 +1,171 @@
+#![allow(clippy::unwrap_used)]
+
+use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use predicates::prelude::*;
+use std::fs;
+use std::path::Path;
+use tempfile::TempDir;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+fn write_config(dir: &Path, server: &MockServer) {
```

> Also, I would name the variable `codex_home` rather than `dir`.

- Created: 2025-07-12 18:04:37 UTC | Link: https://github.com/openai/codex/pull/1545#discussion_r2202854166

```diff
@@ -0,0 +1,171 @@
+#![allow(clippy::unwrap_used)]
+
+use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use predicates::prelude::*;
+use std::fs;
+use std::path::Path;
+use tempfile::TempDir;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+fn write_config(dir: &Path, server: &MockServer) {
+    fs::write(
+        dir.join("config.toml"),
+        format!(
+            r#"model_provider = "mock"
+model = "test-model"
+[model_providers.mock]
+name = "mock"
+base_url = "{}/v1"
+env_key = "PATH"
+wire_api = "responses"
+"#,
+            server.uri()
+        ),
+    )
+    .unwrap();
+}
+
+fn sse_message(text: &str) -> String {
+    format!(
+        "event: response.output_item.done\n\
+data: {{\"type\":\"response.output_item.done\",\"item\":{{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{{\"type\":\"output_text\",\"text\":\"{text}\"}}]}}}}\n\n\
+event: response.completed\n\
+data: {{\"type\":\"response.completed\",\"response\":{{\"id\":\"resp1\",\"output\":[]}}}}\n\n\n"
+    )
+}
+
+fn sse_function_call() -> String {
+    let call = serde_json::json!({
+        "type": "response.output_item.done",
+        "item": {
+            "type": "function_call",
+            "name": "shell",
+            "arguments": "{\"command\":[\"echo\",\"hi\"]}",
+            "call_id": "call1"
+        }
+    });
+    let completed = serde_json::json!({
+        "type": "response.completed",
+        "response": {"id": "resp1", "output": []}
+    });
+    format!(
+        "event: response.output_item.done\ndata: {call}\n\n\
+event: response.completed\ndata: {completed}\n\n\n"
+    )
+}
+
+fn sse_final_after_call() -> String {
+    let msg = serde_json::json!({
+        "type": "response.output_item.done",
+        "item": {"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "done"}]}
+    });
+    let completed = serde_json::json!({
+        "type": "response.completed",
+        "response": {"id": "resp2", "output": []}
+    });
+    format!(
+        "event: response.output_item.done\ndata: {msg}\n\n\
+event: response.completed\ndata: {completed}\n\n\n"
+    )
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn full_conversation_turn_integration() {
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!("Skipping test because network is disabled");
+        return;
+    }
+
+    let server = MockServer::start().await;
+    let resp = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse_message("Hello, world."), "text/event-stream");
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(resp)
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    unsafe {
+        std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0");
+        std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0");
+    }
+
+    let home = TempDir::new().unwrap();
+    let sandbox = TempDir::new().unwrap();
+    write_config(home.path(), &server);
+
+    let mut cmd = assert_cmd::Command::cargo_bin("codex").unwrap();
+    cmd.env("CODEX_HOME", home.path());
+    cmd.current_dir(sandbox.path());
+    cmd.arg("exec").arg("--skip-git-repo-check").arg("Hello");
```

> I would also run this with `--output-last-message FILE` where `FILE` is some file in `sandbox`. Then you can do a more precise assertion on the final message since you can do it against `FILE` instead of stdout. (Same for the other test.)

- Created: 2025-07-12 18:06:39 UTC | Link: https://github.com/openai/codex/pull/1545#discussion_r2202854547

```diff
@@ -0,0 +1,171 @@
+#![allow(clippy::unwrap_used)]
+
+use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use predicates::prelude::*;
+use std::fs;
+use std::path::Path;
+use tempfile::TempDir;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+fn write_config(dir: &Path, server: &MockServer) {
+    fs::write(
+        dir.join("config.toml"),
+        format!(
+            r#"model_provider = "mock"
+model = "test-model"
+[model_providers.mock]
+name = "mock"
+base_url = "{}/v1"
+env_key = "PATH"
+wire_api = "responses"
+"#,
+            server.uri()
+        ),
+    )
+    .unwrap();
+}
+
+fn sse_message(text: &str) -> String {
+    format!(
+        "event: response.output_item.done\n\
+data: {{\"type\":\"response.output_item.done\",\"item\":{{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{{\"type\":\"output_text\",\"text\":\"{text}\"}}]}}}}\n\n\
+event: response.completed\n\
+data: {{\"type\":\"response.completed\",\"response\":{{\"id\":\"resp1\",\"output\":[]}}}}\n\n\n"
+    )
+}
+
+fn sse_function_call() -> String {
+    let call = serde_json::json!({
+        "type": "response.output_item.done",
+        "item": {
+            "type": "function_call",
+            "name": "shell",
+            "arguments": "{\"command\":[\"echo\",\"hi\"]}",
+            "call_id": "call1"
+        }
+    });
+    let completed = serde_json::json!({
+        "type": "response.completed",
+        "response": {"id": "resp1", "output": []}
+    });
+    format!(
+        "event: response.output_item.done\ndata: {call}\n\n\
+event: response.completed\ndata: {completed}\n\n\n"
+    )
+}
+
+fn sse_final_after_call() -> String {
+    let msg = serde_json::json!({
+        "type": "response.output_item.done",
+        "item": {"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "done"}]}
+    });
+    let completed = serde_json::json!({
+        "type": "response.completed",
+        "response": {"id": "resp2", "output": []}
+    });
+    format!(
+        "event: response.output_item.done\ndata: {msg}\n\n\
+event: response.completed\ndata: {completed}\n\n\n"
+    )
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
```

> Could you add docstrings for this test and the other test? Admittedly, there is a lot of code required just to setup these tests, so it's not 100% obvious what is being tested. That is, this line seems to be the key bit that is producing the behavior that we are verifying at the end of the test:
>
> ```rust
> .set_body_raw(sse_message("Hello, world."), "text/event-stream")
> ```