# PR #1545: Add CLI integration tests - URL: https://github.com/openai/codex/pull/1545 - Author: aibrahim-oai - Created: 2025-07-11 21:03:16 UTC - Updated: 2025-07-17 16:38:35 UTC - Changes: +236/-0, Files changed: 3, Commits: 12 ## Description ## Summary - add new integration tests for the Rust CLI - test a basic single-turn response - validate shell tool invocation flow - update Cargo.lock for test dependencies ## Testing - `cargo fmt --all` - `cargo clippy -p codex-cli --tests --all-features -- -D warnings` - `cargo test -p codex-cli --test integration -- --nocapture` ------ https://chatgpt.com/codex/tasks/task_i_68717125ff6083219bf892e0bdf14427 ## Full Diff ```diff diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index e59dbfa255..2b16b82e48 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -617,6 +617,7 @@ name = "codex-cli" version = "0.0.0" dependencies = [ "anyhow", + "assert_cmd", "clap", "clap_complete", "codex-chatgpt", @@ -627,10 +628,14 @@ dependencies = [ "codex-login", "codex-mcp-server", "codex-tui", + "indoc", + "predicates", "serde_json", + "tempfile", "tokio", "tracing", "tracing-subscriber", + "wiremock", ] [[package]] diff --git a/codex-rs/cli/Cargo.toml b/codex-rs/cli/Cargo.toml index 943788157b..9932e89caa 100644 --- a/codex-rs/cli/Cargo.toml +++ b/codex-rs/cli/Cargo.toml @@ -36,3 +36,11 @@ tokio = { version = "1", features = [ ] } tracing = "0.1.41" tracing-subscriber = "0.3.19" + +[dev-dependencies] +assert_cmd = "2" +predicates = "3" +tempfile = "3" +wiremock = "0.6" +tokio = { version = "1", features = ["macros", "rt-multi-thread"] } +indoc = "2" diff --git a/codex-rs/cli/tests/integration.rs b/codex-rs/cli/tests/integration.rs new file mode 100644 index 0000000000..6054dbe3d3 --- /dev/null +++ b/codex-rs/cli/tests/integration.rs @@ -0,0 +1,223 @@ +#![allow(clippy::unwrap_used)] + +//! End-to-end integration tests for the `codex` CLI. +//! +//! These spin up a local [`wiremock`][] server to stand in for the MCP server +//! and then run the real compiled `codex` binary against it. The goal is to +//! verify the high-level request/response flow rather than the details of the +//! individual async functions. +//! +//! [`wiremock`]: https://docs.rs/wiremock + +use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; +use predicates::prelude::*; +use std::fs; +use std::path::Path; +use tempfile::TempDir; +use wiremock::Mock; +use wiremock::MockServer; +use wiremock::ResponseTemplate; +use wiremock::matchers::method; +use wiremock::matchers::path; + +// ----- tests ----- + +/// Sends a single simple prompt and verifies that the streamed response is +/// surfaced to the user. This exercises the most common "ask a question, get a +/// textual answer" flow. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn full_conversation_turn_integration() { + if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() { + println!("Skipping test because network is disabled"); + return; + } + + let server = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/v1/responses")) + .respond_with( + ResponseTemplate::new(200) + .insert_header("content-type", "text/event-stream") + .set_body_raw(sse_message("Hello, world."), "text/event-stream"), + ) + .expect(1) + .mount(&server) + .await; + + // Disable retries — the mock server will fail hard if we make an unexpected + // request, so retries only slow the test down. + unsafe { + std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0"); + std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0"); + } + + let codex_home = TempDir::new().unwrap(); + let sandbox = TempDir::new().unwrap(); + write_config(codex_home.path(), &server); + + // Capture the agent's final message in a file so we can assert on it precisely. + let last_message_file = sandbox.path().join("last_message.txt"); + + let mut cmd = assert_cmd::Command::cargo_bin("codex").unwrap(); + cmd.env("CODEX_HOME", codex_home.path()) + .current_dir(sandbox.path()) + .arg("exec") + .arg("--skip-git-repo-check") + .arg("--output-last-message") + .arg(&last_message_file) + .arg("Hello"); + + cmd.assert() + .success() + .stdout(predicate::str::contains("Hello, world.")); + + // Assert on the captured last message file (more robust than stdout formatting). + let last = fs::read_to_string(&last_message_file).unwrap(); + let expected = "Hello, world."; + assert_eq!(last.trim(), expected); +} + +/// Simulates a tool invocation (`shell`) followed by a second assistant message +/// once the tool call completes. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn tool_invocation_flow() { + if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() { + println!("Skipping test because network is disabled"); + return; + } + + let server = MockServer::start().await; + + // The first request returns a function-call item; the second returns the + // final assistant message. Use an atomic counter to serve them in order. + struct SeqResponder { + count: std::sync::atomic::AtomicUsize, + } + impl wiremock::Respond for SeqResponder { + fn respond(&self, _: &wiremock::Request) -> ResponseTemplate { + use std::sync::atomic::Ordering; + match self.count.fetch_add(1, Ordering::SeqCst) { + 0 => ResponseTemplate::new(200) + .insert_header("content-type", "text/event-stream") + .set_body_raw(sse_function_call(), "text/event-stream"), + _ => ResponseTemplate::new(200) + .insert_header("content-type", "text/event-stream") + .set_body_raw(sse_final_after_call(), "text/event-stream"), + } + } + } + + Mock::given(method("POST")) + .and(path("/v1/responses")) + .respond_with(SeqResponder { + count: std::sync::atomic::AtomicUsize::new(0), + }) + .expect(2) + .mount(&server) + .await; + + unsafe { + std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0"); + std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0"); + } + + let codex_home = TempDir::new().unwrap(); + let sandbox = TempDir::new().unwrap(); + write_config(codex_home.path(), &server); + + // Capture final assistant message after tool invocation. + let last_message_file = sandbox.path().join("last_message.txt"); + + let mut cmd = assert_cmd::Command::cargo_bin("codex").unwrap(); + cmd.env("CODEX_HOME", codex_home.path()) + .current_dir(sandbox.path()) + .arg("exec") + .arg("--skip-git-repo-check") + .arg("--output-last-message") + .arg(&last_message_file) + .arg("Run shell"); + + cmd.assert() + .success() + .stdout(predicate::str::contains("exec echo hi")) + .stdout(predicate::str::contains("hi")); + + // Assert that the final assistant message (second response) was 'done'. + let last = fs::read_to_string(&last_message_file).unwrap(); + let expected = "done"; + assert_eq!(last.trim(), expected); +} + +/// Write a minimal `config.toml` pointing the CLI at the mock server. +fn write_config(codex_home: &Path, server: &MockServer) { + fs::write( + codex_home.join("config.toml"), + format!( + r#" +model_provider = "mock" +model = "test-model" + +[model_providers.mock] +name = "mock" +base_url = "{}/v1" +env_key = "PATH" +wire_api = "responses" +"#, + server.uri() + ), + ) + .unwrap(); +} + +/// Small helper to generate an SSE stream with a single assistant message. +fn sse_message(text: &str) -> String { + const TEMPLATE: &str = r#"event: response.output_item.done +data: {"type":"response.output_item.done","item":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"TEXT_PLACEHOLDER"}]}} + +event: response.completed +data: {"type":"response.completed","response":{"id":"resp1","output":[]}} + + +"#; + + TEMPLATE.replace("TEXT_PLACEHOLDER", text) +} + +/// Helper to craft an SSE stream that returns a `function_call`. +fn sse_function_call() -> String { + let call = serde_json::json!({ + "type": "response.output_item.done", + "item": { + "type": "function_call", + "name": "shell", + "arguments": "{\"command\":[\"echo\",\"hi\"]}", + "call_id": "call1" + } + }); + let completed = serde_json::json!({ + "type": "response.completed", + "response": {"id": "resp1", "output": []} + }); + + format!( + "event: response.output_item.done\ndata: {call}\n\n\ +event: response.completed\ndata: {completed}\n\n\n" + ) +} + +/// SSE stream for the assistant's final message after the tool call returns. +fn sse_final_after_call() -> String { + let msg = serde_json::json!({ + "type": "response.output_item.done", + "item": {"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "done"}]} + }); + let completed = serde_json::json!({ + "type": "response.completed", + "response": {"id": "resp2", "output": []} + }); + + format!( + "event: response.output_item.done\ndata: {msg}\n\n\ +event: response.completed\ndata: {completed}\n\n\n" + ) +} ``` ## Review Comments ### codex-rs/cli/tests/integration.rs - Created: 2025-07-12 17:41:32 UTC | Link: https://github.com/openai/codex/pull/1545#discussion_r2202841506 ```diff @@ -0,0 +1,171 @@ +#![allow(clippy::unwrap_used)] + +use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; +use predicates::prelude::*; +use std::fs; +use std::path::Path; +use tempfile::TempDir; +use wiremock::Mock; +use wiremock::MockServer; +use wiremock::ResponseTemplate; +use wiremock::matchers::method; +use wiremock::matchers::path; + +fn write_config(dir: &Path, server: &MockServer) { ``` > Similar to a comment I made on another PR, please list all of these helper functions below the tests. The tests are the most important thing in this file. - Created: 2025-07-12 17:43:25 UTC | Link: https://github.com/openai/codex/pull/1545#discussion_r2202842748 ```diff @@ -0,0 +1,171 @@ +#![allow(clippy::unwrap_used)] + +use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; +use predicates::prelude::*; +use std::fs; +use std::path::Path; +use tempfile::TempDir; +use wiremock::Mock; +use wiremock::MockServer; +use wiremock::ResponseTemplate; +use wiremock::matchers::method; +use wiremock::matchers::path; + +fn write_config(dir: &Path, server: &MockServer) { + fs::write( + dir.join("config.toml"), + format!( + r#"model_provider = "mock" +model = "test-model" +[model_providers.mock] +name = "mock" +base_url = "{}/v1" +env_key = "PATH" +wire_api = "responses" +"#, ``` > Since the leading newline at the start of the content doesn't hurt anything, I would do this for readability: > > ```suggestion > r#" > model_provider = "mock" > model = "test-model" > [model_providers.mock] > name = "mock" > base_url = "{}/v1" > env_key = "PATH" > wire_api = "responses" > "#, > ``` > > You can also consider https://crates.io/crates/indoc if you feel strongly. - Created: 2025-07-12 17:55:01 UTC | Link: https://github.com/openai/codex/pull/1545#discussion_r2202846863 ```diff @@ -0,0 +1,171 @@ +#![allow(clippy::unwrap_used)] + +use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; +use predicates::prelude::*; +use std::fs; +use std::path::Path; +use tempfile::TempDir; +use wiremock::Mock; +use wiremock::MockServer; +use wiremock::ResponseTemplate; +use wiremock::matchers::method; +use wiremock::matchers::path; + +fn write_config(dir: &Path, server: &MockServer) { + fs::write( + dir.join("config.toml"), + format!( + r#"model_provider = "mock" +model = "test-model" +[model_providers.mock] +name = "mock" +base_url = "{}/v1" +env_key = "PATH" +wire_api = "responses" +"#, + server.uri() + ), + ) + .unwrap(); +} + +fn sse_message(text: &str) -> String { + format!( + "event: response.output_item.done\n\ +data: {{\"type\":\"response.output_item.done\",\"item\":{{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{{\"type\":\"output_text\",\"text\":\"{text}\"}}]}}}}\n\n\ +event: response.completed\n\ +data: {{\"type\":\"response.completed\",\"response\":{{\"id\":\"resp1\",\"output\":[]}}}}\n\n\n" + ) ``` > In this case, the escaping of `{` makes this so hard to read that I would consider using `replace()`: > > ```suggestion > let template = r#"event: response.output_item.done > data: {"type":"response.output_item.done","item":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"TEXT_PLACEHOLDER"}]}} > > event: response.completed > data: {"type":"response.completed","response":{"id":"resp1","output":[]}} > > > "#; > template.replace("TEXT_PLACEHOLDER", text); > ``` - Created: 2025-07-12 17:59:03 UTC | Link: https://github.com/openai/codex/pull/1545#discussion_r2202848480 ```diff @@ -0,0 +1,171 @@ +#![allow(clippy::unwrap_used)] + +use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; +use predicates::prelude::*; +use std::fs; +use std::path::Path; +use tempfile::TempDir; +use wiremock::Mock; +use wiremock::MockServer; +use wiremock::ResponseTemplate; +use wiremock::matchers::method; +use wiremock::matchers::path; + +fn write_config(dir: &Path, server: &MockServer) { ``` > Also, I would name the variable `codex_home` rather than `dir`. - Created: 2025-07-12 18:04:37 UTC | Link: https://github.com/openai/codex/pull/1545#discussion_r2202854166 ```diff @@ -0,0 +1,171 @@ +#![allow(clippy::unwrap_used)] + +use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; +use predicates::prelude::*; +use std::fs; +use std::path::Path; +use tempfile::TempDir; +use wiremock::Mock; +use wiremock::MockServer; +use wiremock::ResponseTemplate; +use wiremock::matchers::method; +use wiremock::matchers::path; + +fn write_config(dir: &Path, server: &MockServer) { + fs::write( + dir.join("config.toml"), + format!( + r#"model_provider = "mock" +model = "test-model" +[model_providers.mock] +name = "mock" +base_url = "{}/v1" +env_key = "PATH" +wire_api = "responses" +"#, + server.uri() + ), + ) + .unwrap(); +} + +fn sse_message(text: &str) -> String { + format!( + "event: response.output_item.done\n\ +data: {{\"type\":\"response.output_item.done\",\"item\":{{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{{\"type\":\"output_text\",\"text\":\"{text}\"}}]}}}}\n\n\ +event: response.completed\n\ +data: {{\"type\":\"response.completed\",\"response\":{{\"id\":\"resp1\",\"output\":[]}}}}\n\n\n" + ) +} + +fn sse_function_call() -> String { + let call = serde_json::json!({ + "type": "response.output_item.done", + "item": { + "type": "function_call", + "name": "shell", + "arguments": "{\"command\":[\"echo\",\"hi\"]}", + "call_id": "call1" + } + }); + let completed = serde_json::json!({ + "type": "response.completed", + "response": {"id": "resp1", "output": []} + }); + format!( + "event: response.output_item.done\ndata: {call}\n\n\ +event: response.completed\ndata: {completed}\n\n\n" + ) +} + +fn sse_final_after_call() -> String { + let msg = serde_json::json!({ + "type": "response.output_item.done", + "item": {"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "done"}]} + }); + let completed = serde_json::json!({ + "type": "response.completed", + "response": {"id": "resp2", "output": []} + }); + format!( + "event: response.output_item.done\ndata: {msg}\n\n\ +event: response.completed\ndata: {completed}\n\n\n" + ) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn full_conversation_turn_integration() { + if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() { + println!("Skipping test because network is disabled"); + return; + } + + let server = MockServer::start().await; + let resp = ResponseTemplate::new(200) + .insert_header("content-type", "text/event-stream") + .set_body_raw(sse_message("Hello, world."), "text/event-stream"); + Mock::given(method("POST")) + .and(path("/v1/responses")) + .respond_with(resp) + .expect(1) + .mount(&server) + .await; + + unsafe { + std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0"); + std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0"); + } + + let home = TempDir::new().unwrap(); + let sandbox = TempDir::new().unwrap(); + write_config(home.path(), &server); + + let mut cmd = assert_cmd::Command::cargo_bin("codex").unwrap(); + cmd.env("CODEX_HOME", home.path()); + cmd.current_dir(sandbox.path()); + cmd.arg("exec").arg("--skip-git-repo-check").arg("Hello"); ``` > I would also run this with `--output-last-message FILE` where `FILE` is some file in `sandbox`. Then you can do a more precise assertion on the final message since you can do it against `FILE` instead of stdout. (Same for the other test.) - Created: 2025-07-12 18:06:39 UTC | Link: https://github.com/openai/codex/pull/1545#discussion_r2202854547 ```diff @@ -0,0 +1,171 @@ +#![allow(clippy::unwrap_used)] + +use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; +use predicates::prelude::*; +use std::fs; +use std::path::Path; +use tempfile::TempDir; +use wiremock::Mock; +use wiremock::MockServer; +use wiremock::ResponseTemplate; +use wiremock::matchers::method; +use wiremock::matchers::path; + +fn write_config(dir: &Path, server: &MockServer) { + fs::write( + dir.join("config.toml"), + format!( + r#"model_provider = "mock" +model = "test-model" +[model_providers.mock] +name = "mock" +base_url = "{}/v1" +env_key = "PATH" +wire_api = "responses" +"#, + server.uri() + ), + ) + .unwrap(); +} + +fn sse_message(text: &str) -> String { + format!( + "event: response.output_item.done\n\ +data: {{\"type\":\"response.output_item.done\",\"item\":{{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{{\"type\":\"output_text\",\"text\":\"{text}\"}}]}}}}\n\n\ +event: response.completed\n\ +data: {{\"type\":\"response.completed\",\"response\":{{\"id\":\"resp1\",\"output\":[]}}}}\n\n\n" + ) +} + +fn sse_function_call() -> String { + let call = serde_json::json!({ + "type": "response.output_item.done", + "item": { + "type": "function_call", + "name": "shell", + "arguments": "{\"command\":[\"echo\",\"hi\"]}", + "call_id": "call1" + } + }); + let completed = serde_json::json!({ + "type": "response.completed", + "response": {"id": "resp1", "output": []} + }); + format!( + "event: response.output_item.done\ndata: {call}\n\n\ +event: response.completed\ndata: {completed}\n\n\n" + ) +} + +fn sse_final_after_call() -> String { + let msg = serde_json::json!({ + "type": "response.output_item.done", + "item": {"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "done"}]} + }); + let completed = serde_json::json!({ + "type": "response.completed", + "response": {"id": "resp2", "output": []} + }); + format!( + "event: response.output_item.done\ndata: {msg}\n\n\ +event: response.completed\ndata: {completed}\n\n\n" + ) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] ``` > Could you add docstrings for this test and the other test? Admittedly, there is a lot of code required just to setup these tests, so it's not 100% obvious what is being tested. That is, this line seems to be the key bit that is producing the behavior that we are verifying at the end of the test: > > ```rust > .set_body_raw(sse_message("Hello, world."), "text/event-stream") > ```