Files
codex/prs/bolinfest/PR-2648.md
2025-09-02 15:17:45 -07:00

19 KiB

PR #2648: [exec] Clean up apply-patch tests

Description

Summary

These tests were getting a bit unwieldy, and they're starting to become load-bearing. Let's clean them up, and get them working solidly so we can easily expand this harness with new tests.

Test Plan

  • Tests continue to pass

Full Diff

diff --git a/codex-rs/exec/tests/fixtures/apply_patch_freeform_final.txt b/codex-rs/exec/tests/fixtures/apply_patch_freeform_final.txt
new file mode 100644
index 0000000000..b01923e0e5
--- /dev/null
+++ b/codex-rs/exec/tests/fixtures/apply_patch_freeform_final.txt
@@ -0,0 +1,4 @@
+class BaseClass:
+  def method():
+
+    return True
diff --git a/codex-rs/exec/tests/fixtures/sse_apply_patch_add.json b/codex-rs/exec/tests/fixtures/sse_apply_patch_add.json
new file mode 100644
index 0000000000..8d2bf261af
--- /dev/null
+++ b/codex-rs/exec/tests/fixtures/sse_apply_patch_add.json
@@ -0,0 +1,25 @@
+[
+  {
+    "type": "response.output_item.done",
+    "item": {
+      "type": "custom_tool_call",
+      "name": "apply_patch",
+      "input": "*** Begin Patch\n*** Add File: test.md\n+Hello world\n*** End Patch",
+      "call_id": "__ID__"
+    }
+  },
+  {
+    "type": "response.completed",
+    "response": {
+      "id": "__ID__",
+      "usage": {
+        "input_tokens": 0,
+        "input_tokens_details": null,
+        "output_tokens": 0,
+        "output_tokens_details": null,
+        "total_tokens": 0
+      },
+      "output": []
+    }
+  }
+]
diff --git a/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_add.json b/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_add.json
new file mode 100644
index 0000000000..ce05e7d482
--- /dev/null
+++ b/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_add.json
@@ -0,0 +1,25 @@
+[
+  {
+    "type": "response.output_item.done",
+    "item": {
+      "type": "custom_tool_call",
+      "name": "apply_patch",
+      "input": "*** Begin Patch\n*** Add File: app.py\n+class BaseClass:\n+  def method():\n+    return False\n*** End Patch",
+      "call_id": "__ID__"
+    }
+  },
+  {
+    "type": "response.completed",
+    "response": {
+      "id": "__ID__",
+      "usage": {
+        "input_tokens": 0,
+        "input_tokens_details": null,
+        "output_tokens": 0,
+        "output_tokens_details": null,
+        "total_tokens": 0
+      },
+      "output": []
+    }
+  }
+]
diff --git a/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_update.json b/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_update.json
new file mode 100644
index 0000000000..8329d9628c
--- /dev/null
+++ b/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_update.json
@@ -0,0 +1,25 @@
+[
+  {
+    "type": "response.output_item.done",
+    "item": {
+      "type": "custom_tool_call",
+      "name": "apply_patch",
+      "input": "*** Begin Patch\n*** Update File: app.py\n@@  def method():\n-    return False\n+\n+    return True\n*** End Patch",
+      "call_id": "__ID__"
+    }
+  },
+  {
+    "type": "response.completed",
+    "response": {
+      "id": "__ID__",
+      "usage": {
+        "input_tokens": 0,
+        "input_tokens_details": null,
+        "output_tokens": 0,
+        "output_tokens_details": null,
+        "total_tokens": 0
+      },
+      "output": []
+    }
+  }
+]
diff --git a/codex-rs/exec/tests/fixtures/sse_apply_patch_update.json b/codex-rs/exec/tests/fixtures/sse_apply_patch_update.json
new file mode 100644
index 0000000000..79689bece3
--- /dev/null
+++ b/codex-rs/exec/tests/fixtures/sse_apply_patch_update.json
@@ -0,0 +1,25 @@
+[
+  {
+    "type": "response.output_item.done",
+    "item": {
+      "type": "function_call",
+      "name": "apply_patch",
+      "arguments": "{\n  \"input\": \"*** Begin Patch\\n*** Update File: test.md\\n@@\\n-Hello world\\n+Final text\\n*** End Patch\"\n}",
+      "call_id": "__ID__"
+    }
+  },
+  {
+    "type": "response.completed",
+    "response": {
+      "id": "__ID__",
+      "usage": {
+        "input_tokens": 0,
+        "input_tokens_details": null,
+        "output_tokens": 0,
+        "output_tokens_details": null,
+        "total_tokens": 0
+      },
+      "output": []
+    }
+  }
+]
diff --git a/codex-rs/exec/tests/fixtures/sse_response_completed.json b/codex-rs/exec/tests/fixtures/sse_response_completed.json
new file mode 100644
index 0000000000..1774dc5e84
--- /dev/null
+++ b/codex-rs/exec/tests/fixtures/sse_response_completed.json
@@ -0,0 +1,16 @@
+[
+  {
+    "type": "response.completed",
+    "response": {
+      "id": "__ID__",
+      "usage": {
+        "input_tokens": 0,
+        "input_tokens_details": null,
+        "output_tokens": 0,
+        "output_tokens_details": null,
+        "total_tokens": 0
+      },
+      "output": []
+    }
+  }
+]
diff --git a/codex-rs/exec/tests/suite/apply_patch.rs b/codex-rs/exec/tests/suite/apply_patch.rs
index f05bb73298..5537853b02 100644
--- a/codex-rs/exec/tests/suite/apply_patch.rs
+++ b/codex-rs/exec/tests/suite/apply_patch.rs
@@ -41,148 +41,31 @@ fn test_standalone_exec_cli_can_use_apply_patch() -> anyhow::Result<()> {
 }
 
 #[cfg(not(target_os = "windows"))]
-#[tokio::test]
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 async fn test_apply_patch_tool() -> anyhow::Result<()> {
-    use core_test_support::load_sse_fixture_with_id_from_str;
-    use tempfile::TempDir;
-    use wiremock::Mock;
-    use wiremock::MockServer;
-    use wiremock::ResponseTemplate;
-    use wiremock::matchers::method;
-    use wiremock::matchers::path;
+    use crate::suite::common::run_e2e_exec_test;
+    use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
 
-    const SSE_TOOL_CALL_ADD: &str = r#"[
-  {
-    "type": "response.output_item.done",
-    "item": {
-      "type": "function_call",
-      "name": "apply_patch",
-      "arguments": "{\n  \"input\": \"*** Begin Patch\\n*** Add File: test.md\\n+Hello world\\n*** End Patch\"\n}",
-      "call_id": "__ID__"
-    }
-  },
-  {
-    "type": "response.completed",
-    "response": {
-      "id": "__ID__",
-      "usage": {
-        "input_tokens": 0,
-        "input_tokens_details": null,
-        "output_tokens": 0,
-        "output_tokens_details": null,
-        "total_tokens": 0
-      },
-      "output": []
-    }
-  }
-]"#;
-
-    const SSE_TOOL_CALL_UPDATE: &str = r#"[
-  {
-    "type": "response.output_item.done",
-    "item": {
-      "type": "function_call",
-      "name": "apply_patch",
-      "arguments": "{\n  \"input\": \"*** Begin Patch\\n*** Update File: test.md\\n@@\\n-Hello world\\n+Final text\\n*** End Patch\"\n}",
-      "call_id": "__ID__"
-    }
-  },
-  {
-    "type": "response.completed",
-    "response": {
-      "id": "__ID__",
-      "usage": {
-        "input_tokens": 0,
-        "input_tokens_details": null,
-        "output_tokens": 0,
-        "output_tokens_details": null,
-        "total_tokens": 0
-      },
-      "output": []
-    }
-  }
-]"#;
-
-    const SSE_TOOL_CALL_COMPLETED: &str = r#"[
-  {
-    "type": "response.completed",
-    "response": {
-      "id": "__ID__",
-      "usage": {
-        "input_tokens": 0,
-        "input_tokens_details": null,
-        "output_tokens": 0,
-        "output_tokens_details": null,
-        "total_tokens": 0
-      },
-      "output": []
-    }
-  }
-]"#;
-
-    // Start a mock model server
-    let server = MockServer::start().await;
-
-    // First response: model calls apply_patch to create test.md
-    let first = ResponseTemplate::new(200)
-        .insert_header("content-type", "text/event-stream")
-        .set_body_raw(
-            load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_ADD, "call1"),
-            "text/event-stream",
-        );
-
-    Mock::given(method("POST"))
-        // .and(path("/v1/responses"))
-        .respond_with(first)
-        .up_to_n_times(1)
-        .mount(&server)
-        .await;
-
-    // Second response: model calls apply_patch to update test.md
-    let second = ResponseTemplate::new(200)
-        .insert_header("content-type", "text/event-stream")
-        .set_body_raw(
-            load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_UPDATE, "call2"),
-            "text/event-stream",
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
         );
+        return Ok(());
+    }
 
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .respond_with(second)
-        .up_to_n_times(1)
-        .mount(&server)
-        .await;
-
-    let final_completed = ResponseTemplate::new(200)
-        .insert_header("content-type", "text/event-stream")
-        .set_body_raw(
-            load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_COMPLETED, "resp3"),
-            "text/event-stream",
-        );
-
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .respond_with(final_completed)
-        .expect(1)
-        .mount(&server)
-        .await;
-
-    let tmp_cwd = TempDir::new().unwrap();
-    Command::cargo_bin("codex-exec")
-        .context("should find binary for codex-exec")?
-        .current_dir(tmp_cwd.path())
-        .env("CODEX_HOME", tmp_cwd.path())
-        .env("OPENAI_API_KEY", "dummy")
-        .env("OPENAI_BASE_URL", format!("{}/v1", server.uri()))
-        .arg("--skip-git-repo-check")
-        .arg("-s")
-        .arg("workspace-write")
-        .arg("foo")
-        .assert()
-        .success();
-
-    // Verify final file contents
-    let final_path = tmp_cwd.path().join("test.md");
+    let tmp_cwd = tempdir().expect("failed to create temp dir");
+    let tmp_path = tmp_cwd.path().to_path_buf();
+    run_e2e_exec_test(
+        tmp_cwd.path(),
+        vec![
+            include_str!("../fixtures/sse_apply_patch_add.json").to_string(),
+            include_str!("../fixtures/sse_apply_patch_update.json").to_string(),
+            include_str!("../fixtures/sse_response_completed.json").to_string(),
+        ],
+    )
+    .await;
+
+    let final_path = tmp_path.join("test.md");
     let contents = std::fs::read_to_string(&final_path)
         .unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
     assert_eq!(contents, "Final text\n");
@@ -190,150 +73,36 @@ async fn test_apply_patch_tool() -> anyhow::Result<()> {
 }
 
 #[cfg(not(target_os = "windows"))]
-#[tokio::test]
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 async fn test_apply_patch_freeform_tool() -> anyhow::Result<()> {
-    use core_test_support::load_sse_fixture_with_id_from_str;
-    use tempfile::TempDir;
-    use wiremock::Mock;
-    use wiremock::MockServer;
-    use wiremock::ResponseTemplate;
-    use wiremock::matchers::method;
-    use wiremock::matchers::path;
+    use crate::suite::common::run_e2e_exec_test;
+    use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
 
-    const SSE_TOOL_CALL_ADD: &str = r#"[
-  {
-    "type": "response.output_item.done",
-    "item": {
-      "type": "custom_tool_call",
-      "name": "apply_patch",
-      "input": "*** Begin Patch\n*** Add File: test.md\n+Hello world\n*** End Patch",
-      "call_id": "__ID__"
-    }
-  },
-  {
-    "type": "response.completed",
-    "response": {
-      "id": "__ID__",
-      "usage": {
-        "input_tokens": 0,
-        "input_tokens_details": null,
-        "output_tokens": 0,
-        "output_tokens_details": null,
-        "total_tokens": 0
-      },
-      "output": []
-    }
-  }
-]"#;
-
-    const SSE_TOOL_CALL_UPDATE: &str = r#"[
-  {
-    "type": "response.output_item.done",
-    "item": {
-      "type": "custom_tool_call",
-      "name": "apply_patch",
-      "input": "*** Begin Patch\n*** Update File: test.md\n@@\n-Hello world\n+Final text\n*** End Patch",
-      "call_id": "__ID__"
-    }
-  },
-  {
-    "type": "response.completed",
-    "response": {
-      "id": "__ID__",
-      "usage": {
-        "input_tokens": 0,
-        "input_tokens_details": null,
-        "output_tokens": 0,
-        "output_tokens_details": null,
-        "total_tokens": 0
-      },
-      "output": []
-    }
-  }
-]"#;
-
-    const SSE_TOOL_CALL_COMPLETED: &str = r#"[
-  {
-    "type": "response.completed",
-    "response": {
-      "id": "__ID__",
-      "usage": {
-        "input_tokens": 0,
-        "input_tokens_details": null,
-        "output_tokens": 0,
-        "output_tokens_details": null,
-        "total_tokens": 0
-      },
-      "output": []
-    }
-  }
-]"#;
-
-    // Start a mock model server
-    let server = MockServer::start().await;
-
-    // First response: model calls apply_patch to create test.md
-    let first = ResponseTemplate::new(200)
-        .insert_header("content-type", "text/event-stream")
-        .set_body_raw(
-            load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_ADD, "call1"),
-            "text/event-stream",
-        );
-
-    Mock::given(method("POST"))
-        // .and(path("/v1/responses"))
-        .respond_with(first)
-        .up_to_n_times(1)
-        .mount(&server)
-        .await;
-
-    // Second response: model calls apply_patch to update test.md
-    let second = ResponseTemplate::new(200)
-        .insert_header("content-type", "text/event-stream")
-        .set_body_raw(
-            load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_UPDATE, "call2"),
-            "text/event-stream",
-        );
-
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .respond_with(second)
-        .up_to_n_times(1)
-        .mount(&server)
-        .await;
-
-    let final_completed = ResponseTemplate::new(200)
-        .insert_header("content-type", "text/event-stream")
-        .set_body_raw(
-            load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_COMPLETED, "resp3"),
-            "text/event-stream",
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
         );
+        return Ok(());
+    }
 
-    Mock::given(method("POST"))
-        // .and(path("/v1/responses"))
-        .respond_with(final_completed)
-        .expect(1)
-        .mount(&server)
-        .await;
-
-    let tmp_cwd = TempDir::new().unwrap();
-    Command::cargo_bin("codex-exec")
-        .context("should find binary for codex-exec")?
-        .current_dir(tmp_cwd.path())
-        .env("CODEX_HOME", tmp_cwd.path())
-        .env("OPENAI_API_KEY", "dummy")
-        .env("OPENAI_BASE_URL", format!("{}/v1", server.uri()))
-        .arg("--skip-git-repo-check")
-        .arg("-s")
-        .arg("workspace-write")
-        .arg("foo")
-        .assert()
-        .success();
+    let tmp_cwd = tempdir().expect("failed to create temp dir");
+    run_e2e_exec_test(
+        tmp_cwd.path(),
+        vec![
+            include_str!("../fixtures/sse_apply_patch_freeform_add.json").to_string(),
+            include_str!("../fixtures/sse_apply_patch_freeform_update.json").to_string(),
+            include_str!("../fixtures/sse_response_completed.json").to_string(),
+        ],
+    )
+    .await;
 
     // Verify final file contents
-    let final_path = tmp_cwd.path().join("test.md");
+    let final_path = tmp_cwd.path().join("app.py");
     let contents = std::fs::read_to_string(&final_path)
         .unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
-    assert_eq!(contents, "Final text\n");
+    assert_eq!(
+        contents,
+        include_str!("../fixtures/apply_patch_freeform_final.txt")
+    );
     Ok(())
 }
diff --git a/codex-rs/exec/tests/suite/common.rs b/codex-rs/exec/tests/suite/common.rs
new file mode 100644
index 0000000000..49747dca05
--- /dev/null
+++ b/codex-rs/exec/tests/suite/common.rs
@@ -0,0 +1,73 @@
+// this file is only used for e2e tests which are currently disabled on windows
+#![cfg(not(target_os = "windows"))]
+#![allow(clippy::expect_used)]
+
+use anyhow::Context;
+use assert_cmd::prelude::*;
+use core_test_support::load_sse_fixture_with_id_from_str;
+use std::path::Path;
+use std::process::Command;
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+use wiremock::Respond;
+
+struct SeqResponder {
+    num_calls: AtomicUsize,
+    responses: Vec<String>,
+}
+
+impl Respond for SeqResponder {
+    fn respond(&self, _: &wiremock::Request) -> wiremock::ResponseTemplate {
+        let call_num = self.num_calls.fetch_add(1, Ordering::SeqCst);
+        match self.responses.get(call_num) {
+            Some(body) => wiremock::ResponseTemplate::new(200)
+                .insert_header("content-type", "text/event-stream")
+                .set_body_raw(
+                    load_sse_fixture_with_id_from_str(body, &format!("request_{}", call_num)),
+                    "text/event-stream",
+                ),
+            None => panic!("no response for {call_num}"),
+        }
+    }
+}
+
+/// Helper function to run an E2E test of a codex-exec call. Starts a wiremock
+/// server, and returns the response_streams in order for each api call. Runs
+/// the codex-exec command with the wiremock server as the model server.
+pub(crate) async fn run_e2e_exec_test(cwd: &Path, response_streams: Vec<String>) {
+    let server = MockServer::start().await;
+
+    let num_calls = response_streams.len();
+    let seq_responder = SeqResponder {
+        num_calls: AtomicUsize::new(0),
+        responses: response_streams,
+    };
+
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(seq_responder)
+        .expect(num_calls as u64)
+        .mount(&server)
+        .await;
+
+    let cwd = cwd.to_path_buf();
+    let uri = server.uri();
+    Command::cargo_bin("codex-exec")
+        .context("should find binary for codex-exec")
+        .expect("should find binary for codex-exec")
+        .current_dir(cwd.clone())
+        .env("CODEX_HOME", cwd.clone())
+        .env("OPENAI_API_KEY", "dummy")
+        .env("OPENAI_BASE_URL", format!("{}/v1", uri))
+        .arg("--skip-git-repo-check")
+        .arg("-s")
+        .arg("danger-full-access")
+        .arg("foo")
+        .assert()
+        .success();
+}
diff --git a/codex-rs/exec/tests/suite/mod.rs b/codex-rs/exec/tests/suite/mod.rs
index 8a83474ef9..75b19ee1b2 100644
--- a/codex-rs/exec/tests/suite/mod.rs
+++ b/codex-rs/exec/tests/suite/mod.rs
@@ -1,3 +1,4 @@
 // Aggregates all former standalone integration tests as modules.
 mod apply_patch;
+mod common;
 mod sandbox;

Review Comments

codex-rs/exec/tests/fixtures/sse_apply_patch_add.json

@@ -0,0 +1,25 @@
+[
+  {
+    "type": "response.output_item.done",
+    "item": {
+      "type": "custom_tool_call",
+      "name": "apply_patch",
+      "input": "*** Begin Patch\n*** Add File: test.md\n+Hello world\n*** End Patch",
+      "call_id": "__ID__"
+    }
+  },
+  {
+    "type": "response.completed",
+    "response": {
+      "id": "__ID__",
+      "usage": {
+        "input_tokens": 0,
+        "input_tokens_details": null,
+        "output_tokens": 0,
+        "output_tokens_details": null,
+        "total_tokens": 0
+      },
+      "output": []
+    }
+  }
+]

Will it break the test if these files have newlines at the end (is it significant to how the request is handled?)

It would be nice to have them here if they're safe.