Migrate coverage to shell_command (#7042)

This commit is contained in:
pakrym-oai
2025-11-20 19:44:00 -08:00
committed by GitHub
parent 830ab4ce20
commit 767b66f407
25 changed files with 284 additions and 262 deletions

View File

@@ -24,3 +24,5 @@ tokio = { workspace = true, features = [
] }
uuid = { workspace = true }
wiremock = { workspace = true }
core_test_support = { path = "../../../core/tests/common" }
shlex = { workspace = true }

View File

@@ -9,12 +9,14 @@ pub use auth_fixtures::ChatGptIdTokenClaims;
pub use auth_fixtures::encode_id_token;
pub use auth_fixtures::write_chatgpt_auth;
use codex_app_server_protocol::JSONRPCResponse;
pub use core_test_support::format_with_current_shell;
pub use core_test_support::format_with_current_shell_display;
pub use mcp_process::McpProcess;
pub use mock_model_server::create_mock_chat_completions_server;
pub use mock_model_server::create_mock_chat_completions_server_unchecked;
pub use responses::create_apply_patch_sse_response;
pub use responses::create_final_assistant_message_sse_response;
pub use responses::create_shell_sse_response;
pub use responses::create_shell_command_sse_response;
pub use rollout::create_fake_rollout;
use serde::de::DeserializeOwned;

View File

@@ -1,17 +1,18 @@
use serde_json::json;
use std::path::Path;
pub fn create_shell_sse_response(
pub fn create_shell_command_sse_response(
command: Vec<String>,
workdir: Option<&Path>,
timeout_ms: Option<u64>,
call_id: &str,
) -> anyhow::Result<String> {
// The `arguments`` for the `shell` tool is a serialized JSON object.
// The `arguments` for the `shell_command` tool is a serialized JSON object.
let command_str = shlex::try_join(command.iter().map(String::as_str))?;
let tool_call_arguments = serde_json::to_string(&json!({
"command": command,
"command": command_str,
"workdir": workdir.map(|w| w.to_string_lossy()),
"timeout": timeout_ms
"timeout_ms": timeout_ms
}))?;
let tool_call = json!({
"choices": [
@@ -21,7 +22,7 @@ pub fn create_shell_sse_response(
{
"id": call_id,
"function": {
"name": "shell",
"name": "shell_command",
"arguments": tool_call_arguments
}
}
@@ -62,10 +63,10 @@ pub fn create_apply_patch_sse_response(
patch_content: &str,
call_id: &str,
) -> anyhow::Result<String> {
// Use shell command to call apply_patch with heredoc format
let shell_command = format!("apply_patch <<'EOF'\n{patch_content}\nEOF");
// Use shell_command to call apply_patch with heredoc format
let command = format!("apply_patch <<'EOF'\n{patch_content}\nEOF");
let tool_call_arguments = serde_json::to_string(&json!({
"command": ["bash", "-lc", shell_command]
"command": command
}))?;
let tool_call = json!({
@@ -76,7 +77,7 @@ pub fn create_apply_patch_sse_response(
{
"id": call_id,
"function": {
"name": "shell",
"name": "shell_command",
"arguments": tool_call_arguments
}
}

View File

@@ -2,7 +2,8 @@ use anyhow::Result;
use app_test_support::McpProcess;
use app_test_support::create_final_assistant_message_sse_response;
use app_test_support::create_mock_chat_completions_server;
use app_test_support::create_shell_sse_response;
use app_test_support::create_shell_command_sse_response;
use app_test_support::format_with_current_shell;
use app_test_support::to_response;
use codex_app_server_protocol::AddConversationListenerParams;
use codex_app_server_protocol::AddConversationSubscriptionResponse;
@@ -56,7 +57,7 @@ async fn test_codex_jsonrpc_conversation_flow() -> Result<()> {
// Create a mock model server that immediately ends each turn.
// Two turns are expected: initial session configure + one user message.
let responses = vec![
create_shell_sse_response(
create_shell_command_sse_response(
vec!["ls".to_string()],
Some(&working_directory),
Some(5000),
@@ -175,7 +176,7 @@ async fn test_send_user_turn_changes_approval_policy_behavior() -> Result<()> {
// Mock server will request a python shell call for the first and second turn, then finish.
let responses = vec![
create_shell_sse_response(
create_shell_command_sse_response(
vec![
"python3".to_string(),
"-c".to_string(),
@@ -186,7 +187,7 @@ async fn test_send_user_turn_changes_approval_policy_behavior() -> Result<()> {
"call1",
)?,
create_final_assistant_message_sse_response("done 1")?,
create_shell_sse_response(
create_shell_command_sse_response(
vec![
"python3".to_string(),
"-c".to_string(),
@@ -267,11 +268,7 @@ async fn test_send_user_turn_changes_approval_policy_behavior() -> Result<()> {
ExecCommandApprovalParams {
conversation_id,
call_id: "call1".to_string(),
command: vec![
"python3".to_string(),
"-c".to_string(),
"print(42)".to_string(),
],
command: format_with_current_shell("python3 -c 'print(42)'"),
cwd: working_directory.clone(),
reason: None,
risk: None,
@@ -353,23 +350,15 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() -> Result<(
std::fs::create_dir(&second_cwd)?;
let responses = vec![
create_shell_sse_response(
vec![
"bash".to_string(),
"-lc".to_string(),
"echo first turn".to_string(),
],
create_shell_command_sse_response(
vec!["echo".to_string(), "first".to_string(), "turn".to_string()],
None,
Some(5000),
"call-first",
)?,
create_final_assistant_message_sse_response("done first")?,
create_shell_sse_response(
vec![
"bash".to_string(),
"-lc".to_string(),
"echo second turn".to_string(),
],
create_shell_command_sse_response(
vec!["echo".to_string(), "second".to_string(), "turn".to_string()],
None,
Some(5000),
"call-second",
@@ -481,13 +470,9 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() -> Result<(
exec_begin.cwd, second_cwd,
"exec turn should run from updated cwd"
);
let expected_command = format_with_current_shell("echo second turn");
assert_eq!(
exec_begin.command,
vec![
"bash".to_string(),
"-lc".to_string(),
"echo second turn".to_string()
],
exec_begin.command, expected_command,
"exec turn should run expected command"
);

View File

@@ -19,7 +19,7 @@ use tokio::time::timeout;
use app_test_support::McpProcess;
use app_test_support::create_mock_chat_completions_server;
use app_test_support::create_shell_sse_response;
use app_test_support::create_shell_command_sse_response;
use app_test_support::to_response;
const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
@@ -56,7 +56,7 @@ async fn shell_command_interruption() -> anyhow::Result<()> {
std::fs::create_dir(&working_directory)?;
// Create mock server with a single SSE response: the long sleep command
let server = create_mock_chat_completions_server(vec![create_shell_sse_response(
let server = create_mock_chat_completions_server(vec![create_shell_command_sse_response(
shell_command.clone(),
Some(&working_directory),
Some(10_000), // 10 seconds timeout in ms

View File

@@ -3,7 +3,7 @@
use anyhow::Result;
use app_test_support::McpProcess;
use app_test_support::create_mock_chat_completions_server;
use app_test_support::create_shell_sse_response;
use app_test_support::create_shell_command_sse_response;
use app_test_support::to_response;
use codex_app_server_protocol::JSONRPCNotification;
use codex_app_server_protocol::JSONRPCResponse;
@@ -41,7 +41,7 @@ async fn turn_interrupt_aborts_running_turn() -> Result<()> {
std::fs::create_dir(&working_directory)?;
// Mock server: long-running shell command then (after abort) nothing else needed.
let server = create_mock_chat_completions_server(vec![create_shell_sse_response(
let server = create_mock_chat_completions_server(vec![create_shell_command_sse_response(
shell_command.clone(),
Some(&working_directory),
Some(10_000),

View File

@@ -4,7 +4,8 @@ use app_test_support::create_apply_patch_sse_response;
use app_test_support::create_final_assistant_message_sse_response;
use app_test_support::create_mock_chat_completions_server;
use app_test_support::create_mock_chat_completions_server_unchecked;
use app_test_support::create_shell_sse_response;
use app_test_support::create_shell_command_sse_response;
use app_test_support::format_with_current_shell_display;
use app_test_support::to_response;
use codex_app_server_protocol::ApprovalDecision;
use codex_app_server_protocol::CommandExecutionStatus;
@@ -203,7 +204,7 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> {
// Mock server: first turn requests a shell call (elicitation), then completes.
// Second turn same, but we'll set approval_policy=never to avoid elicitation.
let responses = vec![
create_shell_sse_response(
create_shell_command_sse_response(
vec![
"python3".to_string(),
"-c".to_string(),
@@ -214,7 +215,7 @@ async fn turn_start_exec_approval_toggle_v2() -> Result<()> {
"call1",
)?,
create_final_assistant_message_sse_response("done 1")?,
create_shell_sse_response(
create_shell_command_sse_response(
vec![
"python3".to_string(),
"-c".to_string(),
@@ -343,23 +344,15 @@ async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> {
std::fs::create_dir(&second_cwd)?;
let responses = vec![
create_shell_sse_response(
vec![
"bash".to_string(),
"-lc".to_string(),
"echo first turn".to_string(),
],
create_shell_command_sse_response(
vec!["echo".to_string(), "first".to_string(), "turn".to_string()],
None,
Some(5000),
"call-first",
)?,
create_final_assistant_message_sse_response("done first")?,
create_shell_sse_response(
vec![
"bash".to_string(),
"-lc".to_string(),
"echo second turn".to_string(),
],
create_shell_command_sse_response(
vec!["echo".to_string(), "second".to_string(), "turn".to_string()],
None,
Some(5000),
"call-second",
@@ -465,7 +458,8 @@ async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> {
unreachable!("loop ensures we break on command execution items");
};
assert_eq!(cwd, second_cwd);
assert_eq!(command, "bash -lc 'echo second turn'");
let expected_command = format_with_current_shell_display("echo second turn");
assert_eq!(command, expected_command);
assert_eq!(status, CommandExecutionStatus::InProgress);
timeout(
@@ -480,6 +474,10 @@ async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> {
#[tokio::test]
async fn turn_start_file_change_approval_v2() -> Result<()> {
skip_if_no_network!(Ok(()));
if cfg!(windows) {
// TODO apply_patch approvals are not parsed from powershell commands yet
return Ok(());
}
let tmp = TempDir::new()?;
let codex_home = tmp.path().join("codex_home");
@@ -626,6 +624,10 @@ async fn turn_start_file_change_approval_v2() -> Result<()> {
#[tokio::test]
async fn turn_start_file_change_approval_decline_v2() -> Result<()> {
skip_if_no_network!(Ok(()));
if cfg!(windows) {
// TODO apply_patch approvals are not parsed from powershell commands yet
return Ok(());
}
let tmp = TempDir::new()?;
let codex_home = tmp.path().join("codex_home");