test: stabilize bazel arm flakes

This commit is contained in:
Yaroslav Volovich
2026-02-25 19:15:10 +00:00
parent e0ab8c645c
commit d52f1a5d37
3 changed files with 100 additions and 7 deletions

View File

@@ -372,8 +372,36 @@ async fn list_apps_emits_updates_and_returns_after_both_lists_load() -> Result<(
is_enabled: true,
}];
let first_update = read_app_list_updated_notification(&mut mcp).await?;
assert_eq!(first_update.data, expected_accessible);
let expected_directory = vec![
AppInfo {
id: "alpha".to_string(),
name: "Alpha".to_string(),
description: Some("Alpha connector".to_string()),
logo_url: Some("https://example.com/alpha.png".to_string()),
logo_url_dark: None,
distribution_channel: None,
branding: alpha_branding.clone(),
app_metadata: alpha_app_metadata.clone(),
labels: alpha_labels.clone(),
install_url: Some("https://chatgpt.com/apps/alpha/alpha".to_string()),
is_accessible: false,
is_enabled: true,
},
AppInfo {
id: "beta".to_string(),
name: "beta".to_string(),
description: None,
logo_url: None,
logo_url_dark: None,
distribution_channel: None,
branding: None,
app_metadata: None,
labels: None,
install_url: Some("https://chatgpt.com/apps/beta/beta".to_string()),
is_accessible: false,
is_enabled: true,
},
];
let expected_merged = vec![
AppInfo {
@@ -406,8 +434,26 @@ async fn list_apps_emits_updates_and_returns_after_both_lists_load() -> Result<(
},
];
let second_update = read_app_list_updated_notification(&mut mcp).await?;
assert_eq!(second_update.data, expected_merged);
let mut saw_merged_update = false;
for update_idx in 0..2 {
let update = read_app_list_updated_notification(&mut mcp).await?;
if update.data == expected_accessible || update.data == expected_directory {
continue;
}
if update.data == expected_merged {
saw_merged_update = true;
continue;
}
panic!(
"unexpected app/list/updated payload at update {update_idx}: {:?}",
update.data
);
}
assert!(
saw_merged_update,
"expected a merged app/list/updated payload before the response"
);
let response: JSONRPCResponse = timeout(
DEFAULT_TIMEOUT,

View File

@@ -143,7 +143,7 @@ async fn interrupt_tool_records_history_entries() {
let output = response_mock
.function_call_output_text(call_id)
.expect("missing function_call_output text");
let re = Regex::new(r"^Wall time: ([0-9]+(?:\.[0-9])?) seconds\naborted by user$")
let re = Regex::new(r"Wall time: ([0-9]+(?:\.[0-9]+)?) seconds\r?\naborted by user")
.expect("compile regex");
let captures = re.captures(&output);
assert_matches!(

View File

@@ -122,6 +122,31 @@ fn is_bwrap_unavailable_output(output: &codex_core::exec::ExecToolCallOutput) ->
|| output.stderr.text.contains("Invalid argument")))
}
fn is_landlock_restrict_output(output: &codex_core::exec::ExecToolCallOutput) -> bool {
output.stderr.text.contains("Sandbox(LandlockRestrict)")
}
async fn should_skip_workspace_write_tests() -> bool {
match run_cmd_result_with_writable_roots(
&["bash", "-lc", "true"],
&[],
LONG_TIMEOUT_MS,
false,
false,
)
.await
{
Ok(_) => false,
Err(CodexErr::Sandbox(SandboxErr::Denied { output, .. })) => {
is_landlock_restrict_output(&output)
}
// A hung probe does not give us actionable signal; skip rather than
// fail a suite that is already running in a degraded sandbox.
Err(CodexErr::Sandbox(SandboxErr::Timeout { .. })) => true,
Err(err) => panic!("workspace-write sandbox probe failed unexpectedly: {err:?}"),
}
}
async fn should_skip_bwrap_tests() -> bool {
match run_cmd_result_with_writable_roots(
&["bash", "-lc", "true"],
@@ -159,6 +184,10 @@ fn expect_denied(
#[tokio::test]
async fn test_root_read() {
if should_skip_workspace_write_tests().await {
eprintln!("skipping workspace-write test: landlock restrictions are unavailable");
return;
}
run_cmd(&["ls", "-l", "/bin"], &[], SHORT_TIMEOUT_MS).await;
}
@@ -265,6 +294,11 @@ async fn bwrap_preserves_writable_dev_shm_bind_mount() {
#[tokio::test]
async fn test_writable_root() {
if should_skip_workspace_write_tests().await {
eprintln!("skipping workspace-write test: landlock restrictions are unavailable");
return;
}
let tmpdir = tempfile::tempdir().unwrap();
let file_path = tmpdir.path().join("test");
run_cmd(
@@ -283,6 +317,11 @@ async fn test_writable_root() {
#[tokio::test]
async fn test_no_new_privs_is_enabled() {
if should_skip_workspace_write_tests().await {
eprintln!("skipping workspace-write test: landlock restrictions are unavailable");
return;
}
let output = run_cmd_output(
&["bash", "-lc", "grep '^NoNewPrivs:' /proc/self/status"],
&[],
@@ -301,9 +340,17 @@ async fn test_no_new_privs_is_enabled() {
}
#[tokio::test]
#[should_panic(expected = "Sandbox(Timeout")]
async fn test_timeout() {
run_cmd(&["sleep", "2"], &[], 50).await;
if should_skip_workspace_write_tests().await {
eprintln!("skipping workspace-write test: landlock restrictions are unavailable");
return;
}
let result = run_cmd_result_with_writable_roots(&["sleep", "2"], &[], 50, false, false).await;
assert!(
matches!(result, Err(CodexErr::Sandbox(SandboxErr::Timeout { .. }))),
"expected sandbox timeout, got {result:?}"
);
}
/// Helper that runs `cmd` under the Linux sandbox and asserts that the command