Compare commits

...

34 Commits

Author SHA1 Message Date
Ahmed Ibrahim
67d91e3cdb tests(app-server): send the correct approval response type
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 22:02:11 +00:00
Ahmed Ibrahim
00e55c001f tests(app-server): collapse approval probe conditionals
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 22:00:29 +00:00
Ahmed Ibrahim
1391806c97 tests(app-server): compare review approvals to started items
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 21:52:31 +00:00
Ahmed Ibrahim
28ddf177b0 tests(app-server): await completed review command item
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 21:41:21 +00:00
Ahmed Ibrahim
a8596575b2 tests: harden review and pending-input probes
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 21:28:20 +00:00
Ahmed Ibrahim
05525668c9 tests(app-server): fix review thread-read borrow
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 21:18:25 +00:00
Ahmed Ibrahim
f485ff2553 tests: stabilize review and pending input probes
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 21:05:42 +00:00
Ahmed Ibrahim
630a08351f merge main into flaky-fix branch
Resolve the code_mode test conflict by keeping the new upstream coverage and leaving the parallel test enabled.

Co-authored-by: Codex <noreply@openai.com>
2026-03-13 20:46:17 +00:00
Ahmed Ibrahim
18479ba8d5 tests(core): wait for streamed follow-up completion
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 20:39:49 +00:00
Ahmed Ibrahim
da86411a23 tests(app-server): match review approvals to command item ids
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 20:30:52 +00:00
Ahmed Ibrahim
c9c78646a8 tests(app-server): accept review approval races
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 20:16:42 +00:00
Ahmed Ibrahim
bd7f028745 tests(app-server): handle fast review command events
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 19:52:15 +00:00
Ahmed Ibrahim
803c6f2c46 tests(app-server): stabilize review approval ordering
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 19:31:30 +00:00
Ahmed Ibrahim
f8312c98e9 tests(app-server): stabilize zsh fork decline flow
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 19:13:17 +00:00
Ahmed Ibrahim
4c5bf81977 tests(app-server): buffer review approval ordering
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 18:52:30 +00:00
Ahmed Ibrahim
48abeed843 tests(core): restore approval write budget
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 18:29:31 +00:00
Ahmed Ibrahim
f280dd95ba tests(app-server): approve review execs before waiting
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 18:08:43 +00:00
Ahmed Ibrahim
a10d6039e2 fix(core): preserve absolute workdir paths
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 17:34:44 +00:00
Ahmed Ibrahim
1c084a71af tests(core): remove extra shell spawn from approval writes
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 17:09:19 +00:00
Ahmed Ibrahim
cb046f2927 tests(core): re-enable unified exec workdir override
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 17:07:26 +00:00
Ahmed Ibrahim
0c0afda6e7 tests(tui): make malformed rules startup deterministic
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 16:59:16 +00:00
Ahmed Ibrahim
e5185e35c3 tests(tui): use public thread manager helper
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 16:51:23 +00:00
Ahmed Ibrahim
b67f16683f tests(tui): escape malformed rules trust path
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 16:37:43 +00:00
Ahmed Ibrahim
6ea03f988f tests(tui): replace malformed rules PTY regression
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 16:34:56 +00:00
Ahmed Ibrahim
9e876176e8 tests(tui): stabilize malformed rules startup regression
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 16:13:45 +00:00
Ahmed Ibrahim
4878051af3 tests(app-server): match review approvals by method
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 15:58:12 +00:00
Ahmed Ibrahim
4ecb7577be tests(tui): re-enable malformed rules startup regression
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 08:36:54 +00:00
Ahmed Ibrahim
ac4403e5a3 tests(core): re-enable pending input follow-up coverage
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 08:30:26 +00:00
Ahmed Ibrahim
5a82b98407 tests(app-server): stabilize review approval item ids
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 08:28:20 +00:00
Ahmed Ibrahim
269c46ea82 tests(app-server): re-enable detached review on windows
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 08:25:35 +00:00
Ahmed Ibrahim
53fcef595b tests(core): restore approval write timeout budget
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 08:12:54 +00:00
Ahmed Ibrahim
eb16217297 tests(core): unignore code mode parallel test on windows
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 07:52:24 +00:00
Ahmed Ibrahim
d4c5fd245e tests(core): remove login startup from approval matrix
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 07:47:09 +00:00
Ahmed Ibrahim
3c0a79f77e tests(core): avoid login-shell startup in approval writes
Co-authored-by: Codex <noreply@openai.com>
2026-03-13 07:17:06 +00:00
13 changed files with 418 additions and 247 deletions

View File

@@ -918,6 +918,27 @@ impl McpProcess {
.with_context(|| "failed to deserialize ServerRequest from JSONRPCRequest")
}
pub async fn read_stream_until_request_method(
&mut self,
method: &str,
) -> anyhow::Result<JSONRPCRequest> {
eprintln!("in read_stream_until_request_method({method})");
let message = self
.read_stream_until_message(|message| {
matches!(
message,
JSONRPCMessage::Request(request) if request.method == method
)
})
.await?;
let JSONRPCMessage::Request(request) = message else {
unreachable!("expected JSONRPCMessage::Request, got {message:?}");
};
Ok(request)
}
pub async fn read_stream_until_response_message(
&mut self,
request_id: RequestId,

View File

@@ -5,6 +5,9 @@ use app_test_support::create_mock_responses_server_repeating_assistant;
use app_test_support::create_mock_responses_server_sequence;
use app_test_support::create_shell_command_sse_response;
use app_test_support::to_response;
use codex_app_server_protocol::CommandExecutionApprovalDecision;
use codex_app_server_protocol::CommandExecutionRequestApprovalParams;
use codex_app_server_protocol::CommandExecutionRequestApprovalResponse;
use codex_app_server_protocol::ItemCompletedNotification;
use codex_app_server_protocol::ItemStartedNotification;
use codex_app_server_protocol::JSONRPCError;
@@ -16,12 +19,12 @@ use codex_app_server_protocol::ReviewDelivery;
use codex_app_server_protocol::ReviewStartParams;
use codex_app_server_protocol::ReviewStartResponse;
use codex_app_server_protocol::ReviewTarget;
use codex_app_server_protocol::ServerRequest;
use codex_app_server_protocol::ThreadItem;
use codex_app_server_protocol::ThreadStartParams;
use codex_app_server_protocol::ThreadStartResponse;
use codex_app_server_protocol::ThreadStartedNotification;
use codex_app_server_protocol::ThreadStatusChangedNotification;
use codex_app_server_protocol::TurnCompletedNotification;
use codex_app_server_protocol::TurnStartParams;
use codex_app_server_protocol::TurnStatus;
use codex_app_server_protocol::UserInput as V2UserInput;
@@ -140,7 +143,6 @@ async fn review_start_runs_review_turn_and_emits_code_review_item() -> Result<()
}
#[tokio::test]
#[ignore = "TODO(owenlin0): flaky"]
async fn review_start_exec_approval_item_id_matches_command_execution_item() -> Result<()> {
let responses = vec![
create_shell_command_sse_response(
@@ -167,7 +169,7 @@ async fn review_start_exec_approval_item_id_matches_command_execution_item() ->
let review_req = mcp
.send_review_start_request(ReviewStartParams {
thread_id,
thread_id: thread_id.clone(),
delivery: Some(ReviewDelivery::Inline),
target: ReviewTarget::Commit {
sha: "1234567deadbeef".to_string(),
@@ -182,45 +184,79 @@ async fn review_start_exec_approval_item_id_matches_command_execution_item() ->
.await??;
let ReviewStartResponse { turn, .. } = to_response::<ReviewStartResponse>(review_resp)?;
let turn_id = turn.id.clone();
let mut started_command_execution_item_id: Option<String> = None;
let mut approval_item_id: Option<String> = None;
let server_req = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_request_message(),
)
.await??;
let ServerRequest::CommandExecutionRequestApproval { request_id, params } = server_req else {
panic!("expected CommandExecutionRequestApproval request");
};
assert_eq!(params.item_id, "review-call-1");
assert_eq!(params.turn_id, turn_id);
let mut command_item_id = None;
for _ in 0..10 {
let item_started: JSONRPCNotification = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_notification_message("item/started"),
)
.await??;
let started: ItemStartedNotification =
serde_json::from_value(item_started.params.expect("params must be present"))?;
if let ThreadItem::CommandExecution { id, .. } = started.item {
command_item_id = Some(id);
break;
let deadline = tokio::time::Instant::now() + DEFAULT_READ_TIMEOUT;
while started_command_execution_item_id.is_none() || approval_item_id.is_none() {
let remaining = deadline.saturating_duration_since(tokio::time::Instant::now());
let message = timeout(remaining, mcp.read_next_message()).await??;
match message {
JSONRPCMessage::Notification(notification) if notification.method == "item/started" => {
let started: ItemStartedNotification =
serde_json::from_value(notification.params.expect("params must be present"))?;
if started.turn_id == turn_id
&& let ThreadItem::CommandExecution { id, .. } = started.item
{
eprintln!(
"review approval probe saw started command execution item: turn_id={turn_id}, item_id={item_id}",
turn_id = started.turn_id,
item_id = id
);
started_command_execution_item_id = Some(id);
}
}
JSONRPCMessage::Request(request)
if request.method == "item/commandExecution/requestApproval" =>
{
eprintln!(
"review approval probe saw request method: {method}",
method = request.method
);
let params: CommandExecutionRequestApprovalParams =
serde_json::from_value(request.params.expect("params must be present"))?;
eprintln!(
"review approval probe matched approval request: turn_id={turn_id}, item_id={item_id}",
turn_id = params.turn_id,
item_id = params.item_id
);
assert_eq!(params.item_id, "review-call-1");
assert_eq!(params.turn_id, turn_id);
approval_item_id = Some(params.item_id);
mcp.send_response(
request.id,
serde_json::to_value(CommandExecutionRequestApprovalResponse {
decision: CommandExecutionApprovalDecision::Accept,
})?,
)
.await?;
}
_ => {}
}
}
let command_item_id = command_item_id.expect("did not observe command execution item");
assert_eq!(command_item_id, params.item_id);
mcp.send_response(
request_id,
serde_json::json!({ "decision": codex_protocol::protocol::ReviewDecision::Approved }),
)
.await?;
timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_notification_message("turn/completed"),
)
.await??;
let started_command_execution_item_id =
started_command_execution_item_id.expect("did not observe started command execution item");
let approval_item_id =
approval_item_id.expect("did not observe command execution approval request");
assert_eq!(started_command_execution_item_id, approval_item_id);
loop {
let remaining = deadline.saturating_duration_since(tokio::time::Instant::now());
let message = timeout(remaining, mcp.read_next_message()).await??;
match message {
JSONRPCMessage::Notification(notification)
if notification.method == "turn/completed" =>
{
let completed: TurnCompletedNotification =
serde_json::from_value(notification.params.expect("params must be present"))?;
if completed.turn.id == turn_id {
break;
}
}
_ => {}
}
}
Ok(())
}
@@ -259,7 +295,6 @@ async fn review_start_rejects_empty_base_branch() -> Result<()> {
Ok(())
}
#[cfg_attr(target_os = "windows", ignore = "flaky on windows CI")]
#[tokio::test]
async fn review_start_with_detached_delivery_returns_new_thread_id() -> Result<()> {
let review_payload = json!({

View File

@@ -15,10 +15,12 @@ use app_test_support::create_shell_command_sse_response;
use app_test_support::to_response;
use codex_app_server_protocol::CommandAction;
use codex_app_server_protocol::CommandExecutionApprovalDecision;
use codex_app_server_protocol::CommandExecutionRequestApprovalParams;
use codex_app_server_protocol::CommandExecutionRequestApprovalResponse;
use codex_app_server_protocol::CommandExecutionStatus;
use codex_app_server_protocol::ItemCompletedNotification;
use codex_app_server_protocol::ItemStartedNotification;
use codex_app_server_protocol::JSONRPCMessage;
use codex_app_server_protocol::JSONRPCResponse;
use codex_app_server_protocol::RequestId;
use codex_app_server_protocol::ServerRequest;
@@ -565,16 +567,14 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
let first_file_str = first_file.to_string_lossy().into_owned();
let second_file_str = second_file.to_string_lossy().into_owned();
let parent_shell_hint = format!("&& {}", &first_file_str);
while target_decision_index < target_decisions.len() || !saw_parent_approval {
let server_req = timeout(
while target_decision_index < target_decisions.len() {
let approval_request = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_request_message(),
mcp.read_stream_until_request_method("item/commandExecution/requestApproval"),
)
.await??;
let ServerRequest::CommandExecutionRequestApproval { request_id, params } = server_req
else {
panic!("expected CommandExecutionRequestApproval request");
};
let params: CommandExecutionRequestApprovalParams =
serde_json::from_value(approval_request.params.expect("params must be present"))?;
assert_eq!(params.item_id, "call-zsh-fork-subcommand-decline");
assert_eq!(params.thread_id, thread.id);
let approval_command = params
@@ -624,44 +624,93 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
// before the parent shell command or target subcommands are reached.
CommandExecutionApprovalDecision::Accept
};
eprintln!(
"zsh subcommand decline approval: target={is_target_subcommand} parent={is_parent_approval} decision={decision:?} command={approval_command}"
);
mcp.send_response(
request_id,
approval_request.id,
serde_json::to_value(CommandExecutionRequestApprovalResponse { decision })?,
)
.await?;
}
assert!(
saw_parent_approval,
"expected parent shell approval request"
);
assert_eq!(approved_subcommand_ids.len(), 2);
assert_ne!(approved_subcommand_ids[0], approved_subcommand_ids[1]);
assert_eq!(approved_subcommand_strings.len(), 2);
assert!(approved_subcommand_strings[0].contains(&first_file.display().to_string()));
assert!(approved_subcommand_strings[1].contains(&second_file.display().to_string()));
let parent_completed_command_execution = timeout(DEFAULT_READ_TIMEOUT, async {
loop {
let completed_notif = mcp
.read_stream_until_notification_message("item/completed")
.await?;
let completed: ItemCompletedNotification = serde_json::from_value(
completed_notif
.params
.clone()
.expect("item/completed params"),
)?;
if let ThreadItem::CommandExecution { id, .. } = &completed.item
&& id == "call-zsh-fork-subcommand-decline"
{
return Ok::<ThreadItem, anyhow::Error>(completed.item);
}
}
})
.await;
enum CompletionEvent {
Parent(ThreadItem),
Turn(TurnCompletedNotification),
}
match parent_completed_command_execution {
Ok(Ok(parent_completed_command_execution)) => {
let completion_event = loop {
let message = timeout(DEFAULT_READ_TIMEOUT, mcp.read_next_message()).await??;
match message {
JSONRPCMessage::Request(request)
if request.method == "item/commandExecution/requestApproval" =>
{
let params: CommandExecutionRequestApprovalParams =
serde_json::from_value(request.params.expect("params must be present"))?;
assert_eq!(params.item_id, "call-zsh-fork-subcommand-decline");
assert_eq!(params.thread_id, thread.id);
let approval_command = params
.command
.as_deref()
.expect("approval command should be present");
let has_first_file = approval_command.contains(&first_file_str);
let has_second_file = approval_command.contains(&second_file_str);
let is_parent_approval = approval_command.contains(&zsh_path.display().to_string())
&& (approval_command.contains(&shell_command)
|| (has_first_file && has_second_file)
|| approval_command.contains(&parent_shell_hint));
if is_parent_approval {
saw_parent_approval = true;
}
eprintln!(
"zsh subcommand decline late approval: parent={is_parent_approval} command={approval_command}"
);
mcp.send_response(
request.id,
serde_json::to_value(CommandExecutionRequestApprovalResponse {
decision: CommandExecutionApprovalDecision::Accept,
})?,
)
.await?;
}
JSONRPCMessage::Request(request) => {
panic!("unexpected request method while awaiting zsh completion: {request:?}");
}
JSONRPCMessage::Notification(notification)
if notification.method == "item/completed" =>
{
let completed: ItemCompletedNotification = serde_json::from_value(
notification.params.clone().expect("item/completed params"),
)?;
if let ThreadItem::CommandExecution { id, .. } = &completed.item
&& id == "call-zsh-fork-subcommand-decline"
{
break CompletionEvent::Parent(completed.item);
}
}
JSONRPCMessage::Notification(notification)
if notification.method == "turn/completed" =>
{
let completed: TurnCompletedNotification = serde_json::from_value(
notification
.params
.expect("turn/completed params must be present"),
)?;
break CompletionEvent::Turn(completed);
}
_ => continue,
}
};
eprintln!("zsh subcommand decline saw parent approval request: {saw_parent_approval}");
match completion_event {
CompletionEvent::Parent(parent_completed_command_execution) => {
let ThreadItem::CommandExecution {
id,
status,
@@ -711,22 +760,11 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
}
}
}
Ok(Err(error)) => return Err(error),
Err(_) => {
CompletionEvent::Turn(completed) => {
// Some zsh builds abort the turn immediately after the rejected
// subcommand without emitting a parent `item/completed`, and Linux
// sandbox failures can also complete the turn before the parent
// completion item is observed.
let completed_notif = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_notification_message("turn/completed"),
)
.await??;
let completed: TurnCompletedNotification = serde_json::from_value(
completed_notif
.params
.expect("turn/completed params must be present"),
)?;
assert_eq!(completed.thread_id, thread.id);
assert_eq!(completed.turn.id, turn.id);
assert!(matches!(

View File

@@ -922,9 +922,12 @@ impl TurnContext {
}
pub(crate) fn resolve_path(&self, path: Option<String>) -> PathBuf {
path.as_ref()
.map(PathBuf::from)
.map_or_else(|| self.cwd.clone(), |p| self.cwd.join(p))
path.as_ref().map(PathBuf::from).map_or_else(
|| self.cwd.clone(),
|p| {
if p.is_absolute() { p } else { self.cwd.join(p) }
},
)
}
pub(crate) fn compact_prompt(&self) -> &str {

View File

@@ -184,6 +184,27 @@ async fn loads_policies_from_policy_subdirectory() {
);
}
#[tokio::test]
async fn returns_error_when_rules_path_is_a_file() {
let temp_dir = tempdir().expect("create temp dir");
let config_stack = config_stack_for_dot_codex_folder(temp_dir.path());
fs::write(
temp_dir.path().join(RULES_DIR_NAME),
"rules should be a directory not a file",
)
.expect("write malformed rules fixture");
let err = load_exec_policy(&config_stack)
.await
.expect_err("expected malformed rules path to fail");
let rendered = format_exec_policy_error_with_source(&err);
assert!(
rendered.contains("failed to read rules files"),
"expected rules read error, got: {rendered}"
);
}
#[tokio::test]
async fn merges_requirements_exec_policy_network_rules() -> anyhow::Result<()> {
let temp_dir = tempdir()?;

View File

@@ -112,6 +112,39 @@ async fn shell_command_handler_to_exec_params_uses_session_shell_and_turn_contex
assert_eq!(exec_params.arg0, None);
}
#[tokio::test]
async fn shell_command_handler_to_exec_params_preserves_absolute_workdir() {
let (session, turn_context) = make_session_and_context().await;
let absolute_workdir = turn_context.cwd.join("absolute-subdir");
let expected_env = create_env(
&turn_context.shell_environment_policy,
Some(session.conversation_id),
);
let params = ShellCommandToolCallParams {
command: "echo hello".to_string(),
workdir: Some(absolute_workdir.to_string_lossy().to_string()),
login: None,
timeout_ms: Some(250),
sandbox_permissions: Some(SandboxPermissions::UseDefault),
additional_permissions: None,
prefix_rule: None,
justification: None,
};
let exec_params = ShellCommandHandler::to_exec_params(
&params,
&session,
&turn_context,
session.conversation_id,
true,
)
.expect("absolute workdir should be accepted");
assert_eq!(exec_params.cwd, absolute_workdir);
assert_eq!(exec_params.env, expected_env);
}
#[test]
fn shell_command_handler_respects_explicit_login_flag() {
let (_tx, shell_snapshot) = watch::channel(Some(Arc::new(ShellSnapshot {

View File

@@ -122,8 +122,11 @@ impl ActionKind {
ActionKind::WriteFile { target, content } => {
let (path, _) = target.resolve_for_patch(test);
let _ = fs::remove_file(&path);
let command = format!("printf {content:?} > {path:?} && cat {path:?}");
let event = shell_event(call_id, &command, 5_000, sandbox_permissions)?;
// Keep this fixture on shell builtins only so approval-flow timing
// does not depend on launching an extra `cat` subprocess.
let command = format!("printf {content:?} > {path:?} && printf {content:?}");
let event =
approval_matrix_shell_event(call_id, &command, 5_000, sandbox_permissions)?;
Ok((event, Some(command)))
}
ActionKind::FetchUrl {
@@ -145,7 +148,8 @@ impl ActionKind {
);
let command = format!("python3 -c \"{script}\"");
let event = shell_event(call_id, &command, 5_000, sandbox_permissions)?;
let event =
approval_matrix_shell_event(call_id, &command, 5_000, sandbox_permissions)?;
Ok((event, Some(command)))
}
ActionKind::FetchUrlNoProxy {
@@ -167,11 +171,13 @@ impl ActionKind {
);
let command = format!("python3 -c \"{script}\"");
let event = shell_event(call_id, &command, 5_000, sandbox_permissions)?;
let event =
approval_matrix_shell_event(call_id, &command, 5_000, sandbox_permissions)?;
Ok((event, Some(command)))
}
ActionKind::RunCommand { command } => {
let event = shell_event(call_id, command, 1_000, sandbox_permissions)?;
let event =
approval_matrix_shell_event(call_id, command, 1_000, sandbox_permissions)?;
Ok((event, Some(command.to_string())))
}
ActionKind::RunUnifiedExecCommand {
@@ -198,7 +204,8 @@ impl ActionKind {
let _ = fs::remove_file(&path);
let patch = build_add_file_patch(&patch_path, content);
let command = shell_apply_patch_command(&patch);
let event = shell_event(call_id, &command, 5_000, sandbox_permissions)?;
let event =
approval_matrix_shell_event(call_id, &command, 5_000, sandbox_permissions)?;
Ok((event, Some(command)))
}
}
@@ -225,7 +232,31 @@ fn shell_event(
timeout_ms: u64,
sandbox_permissions: SandboxPermissions,
) -> Result<Value> {
shell_event_with_prefix_rule(call_id, command, timeout_ms, sandbox_permissions, None)
shell_event_with_prefix_rule(
call_id,
command,
timeout_ms,
sandbox_permissions,
None,
None,
)
}
fn approval_matrix_shell_event(
call_id: &str,
command: &str,
timeout_ms: u64,
sandbox_permissions: SandboxPermissions,
) -> Result<Value> {
// Approval-matrix shell fixtures verify permission behavior, not login-shell startup.
shell_event_with_prefix_rule(
call_id,
command,
timeout_ms,
sandbox_permissions,
None,
Some(false),
)
}
fn shell_event_with_prefix_rule(
@@ -234,6 +265,7 @@ fn shell_event_with_prefix_rule(
timeout_ms: u64,
sandbox_permissions: SandboxPermissions,
prefix_rule: Option<Vec<String>>,
login: Option<bool>,
) -> Result<Value> {
let mut args = json!({
"command": command,
@@ -245,6 +277,9 @@ fn shell_event_with_prefix_rule(
if let Some(prefix_rule) = prefix_rule {
args["prefix_rule"] = json!(prefix_rule);
}
if let Some(login) = login {
args["login"] = json!(login);
}
let args_str = serde_json::to_string(&args)?;
Ok(ev_function_call(call_id, "shell_command", &args_str))
}
@@ -1649,6 +1684,10 @@ async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> {
let command = expected_command
.as_deref()
.expect("exec approval requires shell command");
eprintln!(
"waiting for exec approval in scenario {}: {command}",
scenario.name
);
let approval = expect_exec_approval(&test, command).await;
if let Some(expected_reason) = expected_reason {
assert_eq!(
@@ -1692,6 +1731,10 @@ async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> {
let output_item = results_mock.single_request().function_call_output(call_id);
let result = parse_result(&output_item);
eprintln!(
"scenario {} finished with exit_code={:?}, stdout={:?}",
scenario.name, result.exit_code, result.stdout
);
scenario.expectation.verify(&test, &result)?;
Ok(())
@@ -2082,6 +2125,7 @@ async fn invalid_requested_prefix_rule_falls_back_for_compound_command() -> Resu
1_000,
SandboxPermissions::RequireEscalated,
Some(vec!["touch".to_string()]),
None,
)?;
let _ = mount_sse_once(
@@ -2133,6 +2177,7 @@ async fn approving_fallback_rule_for_compound_command_works() -> Result<()> {
1_000,
SandboxPermissions::RequireEscalated,
Some(vec!["touch".to_string()]),
None,
)?;
let _ = mount_sse_once(
@@ -2180,6 +2225,7 @@ async fn approving_fallback_rule_for_compound_command_works() -> Result<()> {
1_000,
SandboxPermissions::RequireEscalated,
Some(vec!["touch".to_string()]),
None,
)?;
let _ = mount_sse_once(

View File

@@ -330,7 +330,6 @@ text(output.output);
Ok(())
}
#[cfg_attr(windows, ignore = "flaky on windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_nested_tool_calls_can_run_in_parallel() -> Result<()> {
skip_if_no_network!(Ok(()));

View File

@@ -13,6 +13,8 @@ use core_test_support::wait_for_event;
use pretty_assertions::assert_eq;
use serde_json::Value;
use tokio::sync::oneshot;
use tokio::time::Duration;
use tokio::time::timeout;
fn ev_message_item_done(id: &str, text: &str) -> Value {
serde_json::json!({
@@ -44,8 +46,13 @@ fn message_input_texts(body: &Value, role: &str) -> Vec<String> {
.collect()
}
fn has_user_text(body: &Value, text: &str) -> bool {
message_input_texts(body, "user")
.iter()
.any(|candidate| candidate == text)
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[ignore = "TODO(aibrahim): flaky"]
async fn injected_user_input_triggers_follow_up_request_with_deltas() {
let (gate_completed_tx, gate_completed_rx) = oneshot::channel();
@@ -87,8 +94,8 @@ async fn injected_user_input_triggers_follow_up_request_with_deltas() {
},
];
let (server, _completions) =
start_streaming_sse_server(vec![first_chunks, second_chunks]).await;
let (server, completions) = start_streaming_sse_server(vec![first_chunks, second_chunks]).await;
let mut completions = completions.into_iter();
let codex = test_codex()
.with_model("gpt-5.1")
@@ -112,6 +119,7 @@ async fn injected_user_input_triggers_follow_up_request_with_deltas() {
matches!(event, EventMsg::AgentMessageContentDelta(_))
})
.await;
eprintln!("pending input probe observed first assistant delta");
codex
.submit(Op::UserInput {
@@ -123,24 +131,74 @@ async fn injected_user_input_triggers_follow_up_request_with_deltas() {
})
.await
.unwrap();
eprintln!("pending input probe injected second user input");
timeout(Duration::from_secs(5), async {
while !codex.has_pending_input().await {
tokio::task::yield_now().await;
}
})
.await
.expect("timed out waiting for second user input to become pending");
eprintln!("pending input probe observed second user input queued as pending");
let _ = gate_completed_tx.send(());
eprintln!("pending input probe released first response completion gate");
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
let first_completion = completions
.next()
.expect("missing first response stream completion handle");
timeout(Duration::from_secs(5), first_completion)
.await
.expect("timed out waiting for first response stream completion")
.expect("first response stream closed before completion");
eprintln!("pending input probe observed first response stream completion");
let requests = server.requests().await;
let second_completion = completions
.next()
.expect("missing follow-up response stream completion handle");
timeout(Duration::from_secs(5), second_completion)
.await
.expect("timed out waiting for follow-up response stream completion")
.expect("follow-up response stream closed before completion");
eprintln!("pending input probe observed follow-up response stream completion");
let requests = timeout(Duration::from_secs(5), async {
loop {
let requests = server.requests().await;
if requests.len() >= 2 {
break requests;
}
tokio::task::yield_now().await;
}
})
.await
.expect("timed out waiting for follow-up request capture");
eprintln!("pending input probe captured {} requests", requests.len());
assert_eq!(requests.len(), 2);
let first_body: Value = serde_json::from_slice(&requests[0]).expect("parse first request");
let second_body: Value = serde_json::from_slice(&requests[1]).expect("parse second request");
eprintln!("pending input probe request[0] body: {first_body}");
eprintln!("pending input probe request[1] body: {second_body}");
let first_texts = message_input_texts(&first_body, "user");
assert!(first_texts.iter().any(|text| text == "first prompt"));
assert!(!first_texts.iter().any(|text| text == "second prompt"));
let request_bodies = [&first_body, &second_body];
let initial_request_matches = request_bodies
.iter()
.filter(|body| has_user_text(body, "first prompt") && !has_user_text(body, "second prompt"))
.count();
let follow_up_request_matches = request_bodies
.iter()
.filter(|body| has_user_text(body, "first prompt") && has_user_text(body, "second prompt"))
.count();
let second_texts = message_input_texts(&second_body, "user");
assert!(second_texts.iter().any(|text| text == "first prompt"));
assert!(second_texts.iter().any(|text| text == "second prompt"));
assert_eq!(
initial_request_matches, 1,
"expected exactly one initial request with only the first prompt, bodies: {request_bodies:?}"
);
assert_eq!(
follow_up_request_matches, 1,
"expected exactly one follow-up request with both prompts, bodies: {request_bodies:?}"
);
server.shutdown().await;
}

View File

@@ -451,7 +451,6 @@ async fn unified_exec_resolves_relative_workdir() -> Result<()> {
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[ignore = "flaky"]
async fn unified_exec_respects_workdir_override() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
@@ -495,7 +494,7 @@ async fn unified_exec_respects_workdir_override() -> Result<()> {
ev_completed("resp-2"),
]),
];
let request_log = mount_sse_sequence(&server, responses).await;
mount_sse_sequence(&server, responses).await;
let session_model = session_configured.model.clone();
@@ -531,9 +530,6 @@ async fn unified_exec_respects_workdir_override() -> Result<()> {
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
let requests = request_log.requests();
assert!(!requests.is_empty(), "expected at least one POST request");
Ok(())
}

View File

@@ -4,6 +4,7 @@ use codex_core::CodexThread;
use codex_core::NewThread;
use codex_core::ThreadManager;
use codex_core::config::Config;
use codex_core::error::CodexErr;
use codex_protocol::protocol::Event;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::Op;
@@ -24,6 +25,17 @@ async fn initialize_app_server_client_name(thread: &CodexThread) {
}
}
fn emit_startup_error(app_event_tx: &AppEventSender, err: &CodexErr) {
let message = format!("Failed to initialize codex: {err}");
tracing::error!("{message}");
app_event_tx.send(AppEvent::CodexEvent(Event {
id: String::new(),
msg: EventMsg::Error(err.to_error_event(None)),
}));
app_event_tx.send(AppEvent::FatalExitRequest(message));
tracing::error!("failed to initialize codex: {err}");
}
/// Spawn the agent bootstrapper and op forwarding loop, returning the
/// `UnboundedSender<Op>` used by the UI to submit operations.
pub(crate) fn spawn_agent(
@@ -42,14 +54,7 @@ pub(crate) fn spawn_agent(
} = match server.start_thread(config).await {
Ok(v) => v,
Err(err) => {
let message = format!("Failed to initialize codex: {err}");
tracing::error!("{message}");
app_event_tx_clone.send(AppEvent::CodexEvent(Event {
id: "".to_string(),
msg: EventMsg::Error(err.to_error_event(None)),
}));
app_event_tx_clone.send(AppEvent::FatalExitRequest(message));
tracing::error!("failed to initialize codex: {err}");
emit_startup_error(&app_event_tx_clone, &err);
return;
}
};
@@ -147,3 +152,47 @@ pub(crate) fn spawn_op_forwarder(thread: std::sync::Arc<CodexThread>) -> Unbound
codex_op_tx
}
#[cfg(test)]
mod tests {
use super::*;
use tokio::sync::mpsc::unbounded_channel;
#[test]
fn startup_errors_are_forwarded_to_the_ui() {
let (app_event_tx, mut app_event_rx) = unbounded_channel();
let err = CodexErr::Io(std::io::Error::other("failed to read rules files"));
emit_startup_error(&AppEventSender::new(app_event_tx), &err);
let error_event = app_event_rx
.try_recv()
.expect("error event should be forwarded");
let fatal_exit = app_event_rx
.try_recv()
.expect("fatal exit should be forwarded");
let AppEvent::CodexEvent(event) = error_event else {
panic!("expected CodexEvent, got {error_event:?}");
};
let EventMsg::Error(err) = event.msg else {
panic!("expected Error event, got {:?}", event.msg);
};
assert!(
err.message.contains("failed to read rules files"),
"expected rules read error in forwarded event, got: {}",
err.message
);
let AppEvent::FatalExitRequest(message) = fatal_exit else {
panic!("expected FatalExitRequest, got {fatal_exit:?}");
};
assert!(
message.contains("Failed to initialize codex:"),
"expected fatal startup prefix, got: {message}"
);
assert!(
message.contains("failed to read rules files"),
"expected rules read error in fatal exit, got: {message}"
);
}
}

View File

@@ -1,6 +1,5 @@
// Aggregates all former standalone integration tests as modules.
mod model_availability_nux;
mod no_panic_on_startup;
mod status_indicator;
mod vt100_history;
mod vt100_live_commit;

View File

@@ -1,127 +0,0 @@
use std::collections::HashMap;
use std::path::Path;
use std::time::Duration;
use tokio::select;
use tokio::time::timeout;
/// Regression test for https://github.com/openai/codex/issues/8803.
#[tokio::test]
#[ignore = "TODO(mbolin): flaky"]
async fn malformed_rules_should_not_panic() -> anyhow::Result<()> {
// run_codex_cli() does not work on Windows due to PTY limitations.
if cfg!(windows) {
return Ok(());
}
let tmp = tempfile::tempdir()?;
let codex_home = tmp.path();
std::fs::write(
codex_home.join("rules"),
"rules should be a directory not a file",
)?;
// TODO(mbolin): Figure out why using a temp dir as the cwd causes this test
// to hang.
let cwd = std::env::current_dir()?;
let config_contents = format!(
r#"
# Pick a local provider so the CLI doesn't prompt for OpenAI auth in this test.
model_provider = "ollama"
[projects]
"{cwd}" = {{ trust_level = "trusted" }}
"#,
cwd = cwd.display()
);
std::fs::write(codex_home.join("config.toml"), config_contents)?;
let CodexCliOutput { exit_code, output } = run_codex_cli(codex_home, cwd).await?;
assert_ne!(0, exit_code, "Codex CLI should exit nonzero.");
assert!(
output.contains("ERROR: Failed to initialize codex:"),
"expected startup error in output, got: {output}"
);
assert!(
output.contains("failed to read rules files"),
"expected rules read error in output, got: {output}"
);
Ok(())
}
struct CodexCliOutput {
exit_code: i32,
output: String,
}
async fn run_codex_cli(
codex_home: impl AsRef<Path>,
cwd: impl AsRef<Path>,
) -> anyhow::Result<CodexCliOutput> {
let codex_cli = codex_utils_cargo_bin::cargo_bin("codex")?;
let mut env = HashMap::new();
env.insert(
"CODEX_HOME".to_string(),
codex_home.as_ref().display().to_string(),
);
let args = vec!["-c".to_string(), "analytics.enabled=false".to_string()];
let spawned = codex_utils_pty::spawn_pty_process(
codex_cli.to_string_lossy().as_ref(),
&args,
cwd.as_ref(),
&env,
&None,
codex_utils_pty::TerminalSize::default(),
)
.await?;
let mut output = Vec::new();
let codex_utils_pty::SpawnedProcess {
session,
stdout_rx,
stderr_rx,
exit_rx,
} = spawned;
let mut output_rx = codex_utils_pty::combine_output_receivers(stdout_rx, stderr_rx);
let mut exit_rx = exit_rx;
let writer_tx = session.writer_sender();
let exit_code_result = timeout(Duration::from_secs(10), async {
// Read PTY output until the process exits while replying to cursor
// position queries so the TUI can initialize without a real terminal.
loop {
select! {
result = output_rx.recv() => match result {
Ok(chunk) => {
// The TUI asks for the cursor position via ESC[6n.
// Respond with a valid position to unblock startup.
if chunk.windows(4).any(|window| window == b"\x1b[6n") {
let _ = writer_tx.send(b"\x1b[1;1R".to_vec()).await;
}
output.extend_from_slice(&chunk);
}
Err(tokio::sync::broadcast::error::RecvError::Closed) => break exit_rx.await,
Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => {}
},
result = &mut exit_rx => break result,
}
}
})
.await;
let exit_code = match exit_code_result {
Ok(Ok(code)) => code,
Ok(Err(err)) => return Err(err.into()),
Err(_) => {
session.terminate();
anyhow::bail!("timed out waiting for codex CLI to exit");
}
};
// Drain any output that raced with the exit notification.
while let Ok(chunk) = output_rx.try_recv() {
output.extend_from_slice(&chunk);
}
let output = String::from_utf8_lossy(&output);
Ok(CodexCliOutput {
exit_code,
output: output.to_string(),
})
}