20 KiB
PR #1545: Add CLI integration tests
- URL: https://github.com/openai/codex/pull/1545
- Author: aibrahim-oai
- Created: 2025-07-11 21:03:16 UTC
- Updated: 2025-07-17 16:38:35 UTC
- Changes: +236/-0, Files changed: 3, Commits: 12
Description
Summary
- add new integration tests for the Rust CLI
- test a basic single-turn response
- validate shell tool invocation flow
- update Cargo.lock for test dependencies
Testing
cargo fmt --allcargo clippy -p codex-cli --tests --all-features -- -D warningscargo test -p codex-cli --test integration -- --nocapture
https://chatgpt.com/codex/tasks/task_i_68717125ff6083219bf892e0bdf14427
Full Diff
diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock
index e59dbfa255..2b16b82e48 100644
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -617,6 +617,7 @@ name = "codex-cli"
version = "0.0.0"
dependencies = [
"anyhow",
+ "assert_cmd",
"clap",
"clap_complete",
"codex-chatgpt",
@@ -627,10 +628,14 @@ dependencies = [
"codex-login",
"codex-mcp-server",
"codex-tui",
+ "indoc",
+ "predicates",
"serde_json",
+ "tempfile",
"tokio",
"tracing",
"tracing-subscriber",
+ "wiremock",
]
[[package]]
diff --git a/codex-rs/cli/Cargo.toml b/codex-rs/cli/Cargo.toml
index 943788157b..9932e89caa 100644
--- a/codex-rs/cli/Cargo.toml
+++ b/codex-rs/cli/Cargo.toml
@@ -36,3 +36,11 @@ tokio = { version = "1", features = [
] }
tracing = "0.1.41"
tracing-subscriber = "0.3.19"
+
+[dev-dependencies]
+assert_cmd = "2"
+predicates = "3"
+tempfile = "3"
+wiremock = "0.6"
+tokio = { version = "1", features = ["macros", "rt-multi-thread"] }
+indoc = "2"
diff --git a/codex-rs/cli/tests/integration.rs b/codex-rs/cli/tests/integration.rs
new file mode 100644
index 0000000000..6054dbe3d3
--- /dev/null
+++ b/codex-rs/cli/tests/integration.rs
@@ -0,0 +1,223 @@
+#![allow(clippy::unwrap_used)]
+
+//! End-to-end integration tests for the `codex` CLI.
+//!
+//! These spin up a local [`wiremock`][] server to stand in for the MCP server
+//! and then run the real compiled `codex` binary against it. The goal is to
+//! verify the high-level request/response flow rather than the details of the
+//! individual async functions.
+//!
+//! [`wiremock`]: https://docs.rs/wiremock
+
+use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use predicates::prelude::*;
+use std::fs;
+use std::path::Path;
+use tempfile::TempDir;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+// ----- tests -----
+
+/// Sends a single simple prompt and verifies that the streamed response is
+/// surfaced to the user. This exercises the most common "ask a question, get a
+/// textual answer" flow.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn full_conversation_turn_integration() {
+ if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+ println!("Skipping test because network is disabled");
+ return;
+ }
+
+ let server = MockServer::start().await;
+ Mock::given(method("POST"))
+ .and(path("/v1/responses"))
+ .respond_with(
+ ResponseTemplate::new(200)
+ .insert_header("content-type", "text/event-stream")
+ .set_body_raw(sse_message("Hello, world."), "text/event-stream"),
+ )
+ .expect(1)
+ .mount(&server)
+ .await;
+
+ // Disable retries — the mock server will fail hard if we make an unexpected
+ // request, so retries only slow the test down.
+ unsafe {
+ std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0");
+ std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0");
+ }
+
+ let codex_home = TempDir::new().unwrap();
+ let sandbox = TempDir::new().unwrap();
+ write_config(codex_home.path(), &server);
+
+ // Capture the agent's final message in a file so we can assert on it precisely.
+ let last_message_file = sandbox.path().join("last_message.txt");
+
+ let mut cmd = assert_cmd::Command::cargo_bin("codex").unwrap();
+ cmd.env("CODEX_HOME", codex_home.path())
+ .current_dir(sandbox.path())
+ .arg("exec")
+ .arg("--skip-git-repo-check")
+ .arg("--output-last-message")
+ .arg(&last_message_file)
+ .arg("Hello");
+
+ cmd.assert()
+ .success()
+ .stdout(predicate::str::contains("Hello, world."));
+
+ // Assert on the captured last message file (more robust than stdout formatting).
+ let last = fs::read_to_string(&last_message_file).unwrap();
+ let expected = "Hello, world.";
+ assert_eq!(last.trim(), expected);
+}
+
+/// Simulates a tool invocation (`shell`) followed by a second assistant message
+/// once the tool call completes.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn tool_invocation_flow() {
+ if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+ println!("Skipping test because network is disabled");
+ return;
+ }
+
+ let server = MockServer::start().await;
+
+ // The first request returns a function-call item; the second returns the
+ // final assistant message. Use an atomic counter to serve them in order.
+ struct SeqResponder {
+ count: std::sync::atomic::AtomicUsize,
+ }
+ impl wiremock::Respond for SeqResponder {
+ fn respond(&self, _: &wiremock::Request) -> ResponseTemplate {
+ use std::sync::atomic::Ordering;
+ match self.count.fetch_add(1, Ordering::SeqCst) {
+ 0 => ResponseTemplate::new(200)
+ .insert_header("content-type", "text/event-stream")
+ .set_body_raw(sse_function_call(), "text/event-stream"),
+ _ => ResponseTemplate::new(200)
+ .insert_header("content-type", "text/event-stream")
+ .set_body_raw(sse_final_after_call(), "text/event-stream"),
+ }
+ }
+ }
+
+ Mock::given(method("POST"))
+ .and(path("/v1/responses"))
+ .respond_with(SeqResponder {
+ count: std::sync::atomic::AtomicUsize::new(0),
+ })
+ .expect(2)
+ .mount(&server)
+ .await;
+
+ unsafe {
+ std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0");
+ std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0");
+ }
+
+ let codex_home = TempDir::new().unwrap();
+ let sandbox = TempDir::new().unwrap();
+ write_config(codex_home.path(), &server);
+
+ // Capture final assistant message after tool invocation.
+ let last_message_file = sandbox.path().join("last_message.txt");
+
+ let mut cmd = assert_cmd::Command::cargo_bin("codex").unwrap();
+ cmd.env("CODEX_HOME", codex_home.path())
+ .current_dir(sandbox.path())
+ .arg("exec")
+ .arg("--skip-git-repo-check")
+ .arg("--output-last-message")
+ .arg(&last_message_file)
+ .arg("Run shell");
+
+ cmd.assert()
+ .success()
+ .stdout(predicate::str::contains("exec echo hi"))
+ .stdout(predicate::str::contains("hi"));
+
+ // Assert that the final assistant message (second response) was 'done'.
+ let last = fs::read_to_string(&last_message_file).unwrap();
+ let expected = "done";
+ assert_eq!(last.trim(), expected);
+}
+
+/// Write a minimal `config.toml` pointing the CLI at the mock server.
+fn write_config(codex_home: &Path, server: &MockServer) {
+ fs::write(
+ codex_home.join("config.toml"),
+ format!(
+ r#"
+model_provider = "mock"
+model = "test-model"
+
+[model_providers.mock]
+name = "mock"
+base_url = "{}/v1"
+env_key = "PATH"
+wire_api = "responses"
+"#,
+ server.uri()
+ ),
+ )
+ .unwrap();
+}
+
+/// Small helper to generate an SSE stream with a single assistant message.
+fn sse_message(text: &str) -> String {
+ const TEMPLATE: &str = r#"event: response.output_item.done
+data: {"type":"response.output_item.done","item":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"TEXT_PLACEHOLDER"}]}}
+
+event: response.completed
+data: {"type":"response.completed","response":{"id":"resp1","output":[]}}
+
+
+"#;
+
+ TEMPLATE.replace("TEXT_PLACEHOLDER", text)
+}
+
+/// Helper to craft an SSE stream that returns a `function_call`.
+fn sse_function_call() -> String {
+ let call = serde_json::json!({
+ "type": "response.output_item.done",
+ "item": {
+ "type": "function_call",
+ "name": "shell",
+ "arguments": "{\"command\":[\"echo\",\"hi\"]}",
+ "call_id": "call1"
+ }
+ });
+ let completed = serde_json::json!({
+ "type": "response.completed",
+ "response": {"id": "resp1", "output": []}
+ });
+
+ format!(
+ "event: response.output_item.done\ndata: {call}\n\n\
+event: response.completed\ndata: {completed}\n\n\n"
+ )
+}
+
+/// SSE stream for the assistant's final message after the tool call returns.
+fn sse_final_after_call() -> String {
+ let msg = serde_json::json!({
+ "type": "response.output_item.done",
+ "item": {"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "done"}]}
+ });
+ let completed = serde_json::json!({
+ "type": "response.completed",
+ "response": {"id": "resp2", "output": []}
+ });
+
+ format!(
+ "event: response.output_item.done\ndata: {msg}\n\n\
+event: response.completed\ndata: {completed}\n\n\n"
+ )
+}
Review Comments
codex-rs/cli/tests/integration.rs
- Created: 2025-07-12 17:41:32 UTC | Link: https://github.com/openai/codex/pull/1545#discussion_r2202841506
@@ -0,0 +1,171 @@
+#![allow(clippy::unwrap_used)]
+
+use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use predicates::prelude::*;
+use std::fs;
+use std::path::Path;
+use tempfile::TempDir;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+fn write_config(dir: &Path, server: &MockServer) {
Similar to a comment I made on another PR, please list all of these helper functions below the tests. The tests are the most important thing in this file.
- Created: 2025-07-12 17:43:25 UTC | Link: https://github.com/openai/codex/pull/1545#discussion_r2202842748
@@ -0,0 +1,171 @@
+#![allow(clippy::unwrap_used)]
+
+use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use predicates::prelude::*;
+use std::fs;
+use std::path::Path;
+use tempfile::TempDir;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+fn write_config(dir: &Path, server: &MockServer) {
+ fs::write(
+ dir.join("config.toml"),
+ format!(
+ r#"model_provider = "mock"
+model = "test-model"
+[model_providers.mock]
+name = "mock"
+base_url = "{}/v1"
+env_key = "PATH"
+wire_api = "responses"
+"#,
Since the leading newline at the start of the content doesn't hurt anything, I would do this for readability:
r#" model_provider = "mock" model = "test-model" [model_providers.mock] name = "mock" base_url = "{}/v1" env_key = "PATH" wire_api = "responses" "#,You can also consider https://crates.io/crates/indoc if you feel strongly.
- Created: 2025-07-12 17:55:01 UTC | Link: https://github.com/openai/codex/pull/1545#discussion_r2202846863
@@ -0,0 +1,171 @@
+#![allow(clippy::unwrap_used)]
+
+use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use predicates::prelude::*;
+use std::fs;
+use std::path::Path;
+use tempfile::TempDir;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+fn write_config(dir: &Path, server: &MockServer) {
+ fs::write(
+ dir.join("config.toml"),
+ format!(
+ r#"model_provider = "mock"
+model = "test-model"
+[model_providers.mock]
+name = "mock"
+base_url = "{}/v1"
+env_key = "PATH"
+wire_api = "responses"
+"#,
+ server.uri()
+ ),
+ )
+ .unwrap();
+}
+
+fn sse_message(text: &str) -> String {
+ format!(
+ "event: response.output_item.done\n\
+data: {{\"type\":\"response.output_item.done\",\"item\":{{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{{\"type\":\"output_text\",\"text\":\"{text}\"}}]}}}}\n\n\
+event: response.completed\n\
+data: {{\"type\":\"response.completed\",\"response\":{{\"id\":\"resp1\",\"output\":[]}}}}\n\n\n"
+ )
In this case, the escaping of
{makes this so hard to read that I would consider usingreplace():let template = r#"event: response.output_item.done data: {"type":"response.output_item.done","item":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"TEXT_PLACEHOLDER"}]}} event: response.completed data: {"type":"response.completed","response":{"id":"resp1","output":[]}} "#; template.replace("TEXT_PLACEHOLDER", text);
- Created: 2025-07-12 17:59:03 UTC | Link: https://github.com/openai/codex/pull/1545#discussion_r2202848480
@@ -0,0 +1,171 @@
+#![allow(clippy::unwrap_used)]
+
+use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use predicates::prelude::*;
+use std::fs;
+use std::path::Path;
+use tempfile::TempDir;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+fn write_config(dir: &Path, server: &MockServer) {
Also, I would name the variable
codex_homerather thandir.
- Created: 2025-07-12 18:04:37 UTC | Link: https://github.com/openai/codex/pull/1545#discussion_r2202854166
@@ -0,0 +1,171 @@
+#![allow(clippy::unwrap_used)]
+
+use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use predicates::prelude::*;
+use std::fs;
+use std::path::Path;
+use tempfile::TempDir;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+fn write_config(dir: &Path, server: &MockServer) {
+ fs::write(
+ dir.join("config.toml"),
+ format!(
+ r#"model_provider = "mock"
+model = "test-model"
+[model_providers.mock]
+name = "mock"
+base_url = "{}/v1"
+env_key = "PATH"
+wire_api = "responses"
+"#,
+ server.uri()
+ ),
+ )
+ .unwrap();
+}
+
+fn sse_message(text: &str) -> String {
+ format!(
+ "event: response.output_item.done\n\
+data: {{\"type\":\"response.output_item.done\",\"item\":{{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{{\"type\":\"output_text\",\"text\":\"{text}\"}}]}}}}\n\n\
+event: response.completed\n\
+data: {{\"type\":\"response.completed\",\"response\":{{\"id\":\"resp1\",\"output\":[]}}}}\n\n\n"
+ )
+}
+
+fn sse_function_call() -> String {
+ let call = serde_json::json!({
+ "type": "response.output_item.done",
+ "item": {
+ "type": "function_call",
+ "name": "shell",
+ "arguments": "{\"command\":[\"echo\",\"hi\"]}",
+ "call_id": "call1"
+ }
+ });
+ let completed = serde_json::json!({
+ "type": "response.completed",
+ "response": {"id": "resp1", "output": []}
+ });
+ format!(
+ "event: response.output_item.done\ndata: {call}\n\n\
+event: response.completed\ndata: {completed}\n\n\n"
+ )
+}
+
+fn sse_final_after_call() -> String {
+ let msg = serde_json::json!({
+ "type": "response.output_item.done",
+ "item": {"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "done"}]}
+ });
+ let completed = serde_json::json!({
+ "type": "response.completed",
+ "response": {"id": "resp2", "output": []}
+ });
+ format!(
+ "event: response.output_item.done\ndata: {msg}\n\n\
+event: response.completed\ndata: {completed}\n\n\n"
+ )
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn full_conversation_turn_integration() {
+ if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+ println!("Skipping test because network is disabled");
+ return;
+ }
+
+ let server = MockServer::start().await;
+ let resp = ResponseTemplate::new(200)
+ .insert_header("content-type", "text/event-stream")
+ .set_body_raw(sse_message("Hello, world."), "text/event-stream");
+ Mock::given(method("POST"))
+ .and(path("/v1/responses"))
+ .respond_with(resp)
+ .expect(1)
+ .mount(&server)
+ .await;
+
+ unsafe {
+ std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0");
+ std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0");
+ }
+
+ let home = TempDir::new().unwrap();
+ let sandbox = TempDir::new().unwrap();
+ write_config(home.path(), &server);
+
+ let mut cmd = assert_cmd::Command::cargo_bin("codex").unwrap();
+ cmd.env("CODEX_HOME", home.path());
+ cmd.current_dir(sandbox.path());
+ cmd.arg("exec").arg("--skip-git-repo-check").arg("Hello");
I would also run this with
--output-last-message FILEwhereFILEis some file insandbox. Then you can do a more precise assertion on the final message since you can do it againstFILEinstead of stdout. (Same for the other test.)
- Created: 2025-07-12 18:06:39 UTC | Link: https://github.com/openai/codex/pull/1545#discussion_r2202854547
@@ -0,0 +1,171 @@
+#![allow(clippy::unwrap_used)]
+
+use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use predicates::prelude::*;
+use std::fs;
+use std::path::Path;
+use tempfile::TempDir;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+fn write_config(dir: &Path, server: &MockServer) {
+ fs::write(
+ dir.join("config.toml"),
+ format!(
+ r#"model_provider = "mock"
+model = "test-model"
+[model_providers.mock]
+name = "mock"
+base_url = "{}/v1"
+env_key = "PATH"
+wire_api = "responses"
+"#,
+ server.uri()
+ ),
+ )
+ .unwrap();
+}
+
+fn sse_message(text: &str) -> String {
+ format!(
+ "event: response.output_item.done\n\
+data: {{\"type\":\"response.output_item.done\",\"item\":{{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{{\"type\":\"output_text\",\"text\":\"{text}\"}}]}}}}\n\n\
+event: response.completed\n\
+data: {{\"type\":\"response.completed\",\"response\":{{\"id\":\"resp1\",\"output\":[]}}}}\n\n\n"
+ )
+}
+
+fn sse_function_call() -> String {
+ let call = serde_json::json!({
+ "type": "response.output_item.done",
+ "item": {
+ "type": "function_call",
+ "name": "shell",
+ "arguments": "{\"command\":[\"echo\",\"hi\"]}",
+ "call_id": "call1"
+ }
+ });
+ let completed = serde_json::json!({
+ "type": "response.completed",
+ "response": {"id": "resp1", "output": []}
+ });
+ format!(
+ "event: response.output_item.done\ndata: {call}\n\n\
+event: response.completed\ndata: {completed}\n\n\n"
+ )
+}
+
+fn sse_final_after_call() -> String {
+ let msg = serde_json::json!({
+ "type": "response.output_item.done",
+ "item": {"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "done"}]}
+ });
+ let completed = serde_json::json!({
+ "type": "response.completed",
+ "response": {"id": "resp2", "output": []}
+ });
+ format!(
+ "event: response.output_item.done\ndata: {msg}\n\n\
+event: response.completed\ndata: {completed}\n\n\n"
+ )
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
Could you add docstrings for this test and the other test? Admittedly, there is a lot of code required just to setup these tests, so it's not 100% obvious what is being tested. That is, this line seems to be the key bit that is producing the behavior that we are verifying at the end of the test:
.set_body_raw(sse_message("Hello, world."), "text/event-stream")