Files
codex/prs/bolinfest/PR-2648.md
2025-09-02 15:17:45 -07:00

651 lines
19 KiB
Markdown

# PR #2648: [exec] Clean up apply-patch tests
- URL: https://github.com/openai/codex/pull/2648
- Author: dylan-hurd-oai
- Created: 2025-08-24 22:54:26 UTC
- Updated: 2025-08-25 22:08:08 UTC
- Changes: +238/-275, Files changed: 9, Commits: 3
## Description
## Summary
These tests were getting a bit unwieldy, and they're starting to become load-bearing. Let's clean them up, and get them working solidly so we can easily expand this harness with new tests.
## Test Plan
- [x] Tests continue to pass
## Full Diff
```diff
diff --git a/codex-rs/exec/tests/fixtures/apply_patch_freeform_final.txt b/codex-rs/exec/tests/fixtures/apply_patch_freeform_final.txt
new file mode 100644
index 0000000000..b01923e0e5
--- /dev/null
+++ b/codex-rs/exec/tests/fixtures/apply_patch_freeform_final.txt
@@ -0,0 +1,4 @@
+class BaseClass:
+ def method():
+
+ return True
diff --git a/codex-rs/exec/tests/fixtures/sse_apply_patch_add.json b/codex-rs/exec/tests/fixtures/sse_apply_patch_add.json
new file mode 100644
index 0000000000..8d2bf261af
--- /dev/null
+++ b/codex-rs/exec/tests/fixtures/sse_apply_patch_add.json
@@ -0,0 +1,25 @@
+[
+ {
+ "type": "response.output_item.done",
+ "item": {
+ "type": "custom_tool_call",
+ "name": "apply_patch",
+ "input": "*** Begin Patch\n*** Add File: test.md\n+Hello world\n*** End Patch",
+ "call_id": "__ID__"
+ }
+ },
+ {
+ "type": "response.completed",
+ "response": {
+ "id": "__ID__",
+ "usage": {
+ "input_tokens": 0,
+ "input_tokens_details": null,
+ "output_tokens": 0,
+ "output_tokens_details": null,
+ "total_tokens": 0
+ },
+ "output": []
+ }
+ }
+]
diff --git a/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_add.json b/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_add.json
new file mode 100644
index 0000000000..ce05e7d482
--- /dev/null
+++ b/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_add.json
@@ -0,0 +1,25 @@
+[
+ {
+ "type": "response.output_item.done",
+ "item": {
+ "type": "custom_tool_call",
+ "name": "apply_patch",
+ "input": "*** Begin Patch\n*** Add File: app.py\n+class BaseClass:\n+ def method():\n+ return False\n*** End Patch",
+ "call_id": "__ID__"
+ }
+ },
+ {
+ "type": "response.completed",
+ "response": {
+ "id": "__ID__",
+ "usage": {
+ "input_tokens": 0,
+ "input_tokens_details": null,
+ "output_tokens": 0,
+ "output_tokens_details": null,
+ "total_tokens": 0
+ },
+ "output": []
+ }
+ }
+]
diff --git a/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_update.json b/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_update.json
new file mode 100644
index 0000000000..8329d9628c
--- /dev/null
+++ b/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_update.json
@@ -0,0 +1,25 @@
+[
+ {
+ "type": "response.output_item.done",
+ "item": {
+ "type": "custom_tool_call",
+ "name": "apply_patch",
+ "input": "*** Begin Patch\n*** Update File: app.py\n@@ def method():\n- return False\n+\n+ return True\n*** End Patch",
+ "call_id": "__ID__"
+ }
+ },
+ {
+ "type": "response.completed",
+ "response": {
+ "id": "__ID__",
+ "usage": {
+ "input_tokens": 0,
+ "input_tokens_details": null,
+ "output_tokens": 0,
+ "output_tokens_details": null,
+ "total_tokens": 0
+ },
+ "output": []
+ }
+ }
+]
diff --git a/codex-rs/exec/tests/fixtures/sse_apply_patch_update.json b/codex-rs/exec/tests/fixtures/sse_apply_patch_update.json
new file mode 100644
index 0000000000..79689bece3
--- /dev/null
+++ b/codex-rs/exec/tests/fixtures/sse_apply_patch_update.json
@@ -0,0 +1,25 @@
+[
+ {
+ "type": "response.output_item.done",
+ "item": {
+ "type": "function_call",
+ "name": "apply_patch",
+ "arguments": "{\n \"input\": \"*** Begin Patch\\n*** Update File: test.md\\n@@\\n-Hello world\\n+Final text\\n*** End Patch\"\n}",
+ "call_id": "__ID__"
+ }
+ },
+ {
+ "type": "response.completed",
+ "response": {
+ "id": "__ID__",
+ "usage": {
+ "input_tokens": 0,
+ "input_tokens_details": null,
+ "output_tokens": 0,
+ "output_tokens_details": null,
+ "total_tokens": 0
+ },
+ "output": []
+ }
+ }
+]
diff --git a/codex-rs/exec/tests/fixtures/sse_response_completed.json b/codex-rs/exec/tests/fixtures/sse_response_completed.json
new file mode 100644
index 0000000000..1774dc5e84
--- /dev/null
+++ b/codex-rs/exec/tests/fixtures/sse_response_completed.json
@@ -0,0 +1,16 @@
+[
+ {
+ "type": "response.completed",
+ "response": {
+ "id": "__ID__",
+ "usage": {
+ "input_tokens": 0,
+ "input_tokens_details": null,
+ "output_tokens": 0,
+ "output_tokens_details": null,
+ "total_tokens": 0
+ },
+ "output": []
+ }
+ }
+]
diff --git a/codex-rs/exec/tests/suite/apply_patch.rs b/codex-rs/exec/tests/suite/apply_patch.rs
index f05bb73298..5537853b02 100644
--- a/codex-rs/exec/tests/suite/apply_patch.rs
+++ b/codex-rs/exec/tests/suite/apply_patch.rs
@@ -41,148 +41,31 @@ fn test_standalone_exec_cli_can_use_apply_patch() -> anyhow::Result<()> {
}
#[cfg(not(target_os = "windows"))]
-#[tokio::test]
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn test_apply_patch_tool() -> anyhow::Result<()> {
- use core_test_support::load_sse_fixture_with_id_from_str;
- use tempfile::TempDir;
- use wiremock::Mock;
- use wiremock::MockServer;
- use wiremock::ResponseTemplate;
- use wiremock::matchers::method;
- use wiremock::matchers::path;
+ use crate::suite::common::run_e2e_exec_test;
+ use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
- const SSE_TOOL_CALL_ADD: &str = r#"[
- {
- "type": "response.output_item.done",
- "item": {
- "type": "function_call",
- "name": "apply_patch",
- "arguments": "{\n \"input\": \"*** Begin Patch\\n*** Add File: test.md\\n+Hello world\\n*** End Patch\"\n}",
- "call_id": "__ID__"
- }
- },
- {
- "type": "response.completed",
- "response": {
- "id": "__ID__",
- "usage": {
- "input_tokens": 0,
- "input_tokens_details": null,
- "output_tokens": 0,
- "output_tokens_details": null,
- "total_tokens": 0
- },
- "output": []
- }
- }
-]"#;
-
- const SSE_TOOL_CALL_UPDATE: &str = r#"[
- {
- "type": "response.output_item.done",
- "item": {
- "type": "function_call",
- "name": "apply_patch",
- "arguments": "{\n \"input\": \"*** Begin Patch\\n*** Update File: test.md\\n@@\\n-Hello world\\n+Final text\\n*** End Patch\"\n}",
- "call_id": "__ID__"
- }
- },
- {
- "type": "response.completed",
- "response": {
- "id": "__ID__",
- "usage": {
- "input_tokens": 0,
- "input_tokens_details": null,
- "output_tokens": 0,
- "output_tokens_details": null,
- "total_tokens": 0
- },
- "output": []
- }
- }
-]"#;
-
- const SSE_TOOL_CALL_COMPLETED: &str = r#"[
- {
- "type": "response.completed",
- "response": {
- "id": "__ID__",
- "usage": {
- "input_tokens": 0,
- "input_tokens_details": null,
- "output_tokens": 0,
- "output_tokens_details": null,
- "total_tokens": 0
- },
- "output": []
- }
- }
-]"#;
-
- // Start a mock model server
- let server = MockServer::start().await;
-
- // First response: model calls apply_patch to create test.md
- let first = ResponseTemplate::new(200)
- .insert_header("content-type", "text/event-stream")
- .set_body_raw(
- load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_ADD, "call1"),
- "text/event-stream",
- );
-
- Mock::given(method("POST"))
- // .and(path("/v1/responses"))
- .respond_with(first)
- .up_to_n_times(1)
- .mount(&server)
- .await;
-
- // Second response: model calls apply_patch to update test.md
- let second = ResponseTemplate::new(200)
- .insert_header("content-type", "text/event-stream")
- .set_body_raw(
- load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_UPDATE, "call2"),
- "text/event-stream",
+ if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+ println!(
+ "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
);
+ return Ok(());
+ }
- Mock::given(method("POST"))
- .and(path("/v1/responses"))
- .respond_with(second)
- .up_to_n_times(1)
- .mount(&server)
- .await;
-
- let final_completed = ResponseTemplate::new(200)
- .insert_header("content-type", "text/event-stream")
- .set_body_raw(
- load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_COMPLETED, "resp3"),
- "text/event-stream",
- );
-
- Mock::given(method("POST"))
- .and(path("/v1/responses"))
- .respond_with(final_completed)
- .expect(1)
- .mount(&server)
- .await;
-
- let tmp_cwd = TempDir::new().unwrap();
- Command::cargo_bin("codex-exec")
- .context("should find binary for codex-exec")?
- .current_dir(tmp_cwd.path())
- .env("CODEX_HOME", tmp_cwd.path())
- .env("OPENAI_API_KEY", "dummy")
- .env("OPENAI_BASE_URL", format!("{}/v1", server.uri()))
- .arg("--skip-git-repo-check")
- .arg("-s")
- .arg("workspace-write")
- .arg("foo")
- .assert()
- .success();
-
- // Verify final file contents
- let final_path = tmp_cwd.path().join("test.md");
+ let tmp_cwd = tempdir().expect("failed to create temp dir");
+ let tmp_path = tmp_cwd.path().to_path_buf();
+ run_e2e_exec_test(
+ tmp_cwd.path(),
+ vec![
+ include_str!("../fixtures/sse_apply_patch_add.json").to_string(),
+ include_str!("../fixtures/sse_apply_patch_update.json").to_string(),
+ include_str!("../fixtures/sse_response_completed.json").to_string(),
+ ],
+ )
+ .await;
+
+ let final_path = tmp_path.join("test.md");
let contents = std::fs::read_to_string(&final_path)
.unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
assert_eq!(contents, "Final text\n");
@@ -190,150 +73,36 @@ async fn test_apply_patch_tool() -> anyhow::Result<()> {
}
#[cfg(not(target_os = "windows"))]
-#[tokio::test]
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn test_apply_patch_freeform_tool() -> anyhow::Result<()> {
- use core_test_support::load_sse_fixture_with_id_from_str;
- use tempfile::TempDir;
- use wiremock::Mock;
- use wiremock::MockServer;
- use wiremock::ResponseTemplate;
- use wiremock::matchers::method;
- use wiremock::matchers::path;
+ use crate::suite::common::run_e2e_exec_test;
+ use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
- const SSE_TOOL_CALL_ADD: &str = r#"[
- {
- "type": "response.output_item.done",
- "item": {
- "type": "custom_tool_call",
- "name": "apply_patch",
- "input": "*** Begin Patch\n*** Add File: test.md\n+Hello world\n*** End Patch",
- "call_id": "__ID__"
- }
- },
- {
- "type": "response.completed",
- "response": {
- "id": "__ID__",
- "usage": {
- "input_tokens": 0,
- "input_tokens_details": null,
- "output_tokens": 0,
- "output_tokens_details": null,
- "total_tokens": 0
- },
- "output": []
- }
- }
-]"#;
-
- const SSE_TOOL_CALL_UPDATE: &str = r#"[
- {
- "type": "response.output_item.done",
- "item": {
- "type": "custom_tool_call",
- "name": "apply_patch",
- "input": "*** Begin Patch\n*** Update File: test.md\n@@\n-Hello world\n+Final text\n*** End Patch",
- "call_id": "__ID__"
- }
- },
- {
- "type": "response.completed",
- "response": {
- "id": "__ID__",
- "usage": {
- "input_tokens": 0,
- "input_tokens_details": null,
- "output_tokens": 0,
- "output_tokens_details": null,
- "total_tokens": 0
- },
- "output": []
- }
- }
-]"#;
-
- const SSE_TOOL_CALL_COMPLETED: &str = r#"[
- {
- "type": "response.completed",
- "response": {
- "id": "__ID__",
- "usage": {
- "input_tokens": 0,
- "input_tokens_details": null,
- "output_tokens": 0,
- "output_tokens_details": null,
- "total_tokens": 0
- },
- "output": []
- }
- }
-]"#;
-
- // Start a mock model server
- let server = MockServer::start().await;
-
- // First response: model calls apply_patch to create test.md
- let first = ResponseTemplate::new(200)
- .insert_header("content-type", "text/event-stream")
- .set_body_raw(
- load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_ADD, "call1"),
- "text/event-stream",
- );
-
- Mock::given(method("POST"))
- // .and(path("/v1/responses"))
- .respond_with(first)
- .up_to_n_times(1)
- .mount(&server)
- .await;
-
- // Second response: model calls apply_patch to update test.md
- let second = ResponseTemplate::new(200)
- .insert_header("content-type", "text/event-stream")
- .set_body_raw(
- load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_UPDATE, "call2"),
- "text/event-stream",
- );
-
- Mock::given(method("POST"))
- .and(path("/v1/responses"))
- .respond_with(second)
- .up_to_n_times(1)
- .mount(&server)
- .await;
-
- let final_completed = ResponseTemplate::new(200)
- .insert_header("content-type", "text/event-stream")
- .set_body_raw(
- load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_COMPLETED, "resp3"),
- "text/event-stream",
+ if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+ println!(
+ "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
);
+ return Ok(());
+ }
- Mock::given(method("POST"))
- // .and(path("/v1/responses"))
- .respond_with(final_completed)
- .expect(1)
- .mount(&server)
- .await;
-
- let tmp_cwd = TempDir::new().unwrap();
- Command::cargo_bin("codex-exec")
- .context("should find binary for codex-exec")?
- .current_dir(tmp_cwd.path())
- .env("CODEX_HOME", tmp_cwd.path())
- .env("OPENAI_API_KEY", "dummy")
- .env("OPENAI_BASE_URL", format!("{}/v1", server.uri()))
- .arg("--skip-git-repo-check")
- .arg("-s")
- .arg("workspace-write")
- .arg("foo")
- .assert()
- .success();
+ let tmp_cwd = tempdir().expect("failed to create temp dir");
+ run_e2e_exec_test(
+ tmp_cwd.path(),
+ vec![
+ include_str!("../fixtures/sse_apply_patch_freeform_add.json").to_string(),
+ include_str!("../fixtures/sse_apply_patch_freeform_update.json").to_string(),
+ include_str!("../fixtures/sse_response_completed.json").to_string(),
+ ],
+ )
+ .await;
// Verify final file contents
- let final_path = tmp_cwd.path().join("test.md");
+ let final_path = tmp_cwd.path().join("app.py");
let contents = std::fs::read_to_string(&final_path)
.unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
- assert_eq!(contents, "Final text\n");
+ assert_eq!(
+ contents,
+ include_str!("../fixtures/apply_patch_freeform_final.txt")
+ );
Ok(())
}
diff --git a/codex-rs/exec/tests/suite/common.rs b/codex-rs/exec/tests/suite/common.rs
new file mode 100644
index 0000000000..49747dca05
--- /dev/null
+++ b/codex-rs/exec/tests/suite/common.rs
@@ -0,0 +1,73 @@
+// this file is only used for e2e tests which are currently disabled on windows
+#![cfg(not(target_os = "windows"))]
+#![allow(clippy::expect_used)]
+
+use anyhow::Context;
+use assert_cmd::prelude::*;
+use core_test_support::load_sse_fixture_with_id_from_str;
+use std::path::Path;
+use std::process::Command;
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+use wiremock::Respond;
+
+struct SeqResponder {
+ num_calls: AtomicUsize,
+ responses: Vec<String>,
+}
+
+impl Respond for SeqResponder {
+ fn respond(&self, _: &wiremock::Request) -> wiremock::ResponseTemplate {
+ let call_num = self.num_calls.fetch_add(1, Ordering::SeqCst);
+ match self.responses.get(call_num) {
+ Some(body) => wiremock::ResponseTemplate::new(200)
+ .insert_header("content-type", "text/event-stream")
+ .set_body_raw(
+ load_sse_fixture_with_id_from_str(body, &format!("request_{}", call_num)),
+ "text/event-stream",
+ ),
+ None => panic!("no response for {call_num}"),
+ }
+ }
+}
+
+/// Helper function to run an E2E test of a codex-exec call. Starts a wiremock
+/// server, and returns the response_streams in order for each api call. Runs
+/// the codex-exec command with the wiremock server as the model server.
+pub(crate) async fn run_e2e_exec_test(cwd: &Path, response_streams: Vec<String>) {
+ let server = MockServer::start().await;
+
+ let num_calls = response_streams.len();
+ let seq_responder = SeqResponder {
+ num_calls: AtomicUsize::new(0),
+ responses: response_streams,
+ };
+
+ Mock::given(method("POST"))
+ .and(path("/v1/responses"))
+ .respond_with(seq_responder)
+ .expect(num_calls as u64)
+ .mount(&server)
+ .await;
+
+ let cwd = cwd.to_path_buf();
+ let uri = server.uri();
+ Command::cargo_bin("codex-exec")
+ .context("should find binary for codex-exec")
+ .expect("should find binary for codex-exec")
+ .current_dir(cwd.clone())
+ .env("CODEX_HOME", cwd.clone())
+ .env("OPENAI_API_KEY", "dummy")
+ .env("OPENAI_BASE_URL", format!("{}/v1", uri))
+ .arg("--skip-git-repo-check")
+ .arg("-s")
+ .arg("danger-full-access")
+ .arg("foo")
+ .assert()
+ .success();
+}
diff --git a/codex-rs/exec/tests/suite/mod.rs b/codex-rs/exec/tests/suite/mod.rs
index 8a83474ef9..75b19ee1b2 100644
--- a/codex-rs/exec/tests/suite/mod.rs
+++ b/codex-rs/exec/tests/suite/mod.rs
@@ -1,3 +1,4 @@
// Aggregates all former standalone integration tests as modules.
mod apply_patch;
+mod common;
mod sandbox;
```
## Review Comments
### codex-rs/exec/tests/fixtures/sse_apply_patch_add.json
- Created: 2025-08-25 16:43:54 UTC | Link: https://github.com/openai/codex/pull/2648#discussion_r2298595162
```diff
@@ -0,0 +1,25 @@
+[
+ {
+ "type": "response.output_item.done",
+ "item": {
+ "type": "custom_tool_call",
+ "name": "apply_patch",
+ "input": "*** Begin Patch\n*** Add File: test.md\n+Hello world\n*** End Patch",
+ "call_id": "__ID__"
+ }
+ },
+ {
+ "type": "response.completed",
+ "response": {
+ "id": "__ID__",
+ "usage": {
+ "input_tokens": 0,
+ "input_tokens_details": null,
+ "output_tokens": 0,
+ "output_tokens_details": null,
+ "total_tokens": 0
+ },
+ "output": []
+ }
+ }
+]
```
> Will it break the test if these files have newlines at the end (is it significant to how the request is handled?)
>
> It would be nice to have them here if they're safe.