codex: restart PR CI (#13871 )

codex: reconnect PR ancestry (#13871 )
codex: align Bazel snapshot source remap
2026-03-16 10:56:29 +03:00 · 2026-03-07 23:18:45 -08:00 · 2026-03-07 11:30:15 -08:00 · 2026-03-07 11:28:06 -08:00 · 2026-03-07 11:24:09 -08:00 · 2026-03-07 11:20:59 -08:00
10 changed files with 133 additions and 97 deletions
--- a/codex-rs/app-server/tests/suite/v2/thread_unsubscribe.rs
+++ b/codex-rs/app-server/tests/suite/v2/thread_unsubscribe.rs
@@ -34,6 +34,51 @@ use tokio::time::timeout;

 const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);

+async fn wait_for_responses_request_count_to_stabilize(
+    server: &wiremock::MockServer,
+    expected_count: usize,
+    settle_duration: std::time::Duration,
+) -> Result<()> {
+    timeout(DEFAULT_READ_TIMEOUT, async {
+        let mut stable_since: Option<tokio::time::Instant> = None;
+        loop {
+            let requests = server
+                .received_requests()
+                .await
+                .context("failed to fetch received requests")?;
+            let responses_request_count = requests
+                .iter()
+                .filter(|request| {
+                    request.method == "POST" && request.url.path().ends_with("/responses")
+                })
+                .count();
+
+            if responses_request_count > expected_count {
+                anyhow::bail!(
+                    "expected exactly {expected_count} /responses requests, got {responses_request_count}"
+                );
+            }
+
+            if responses_request_count == expected_count {
+                match stable_since {
+                    Some(stable_since) if stable_since.elapsed() >= settle_duration => {
+                        return Ok::<(), anyhow::Error>(());
+                    }
+                    None => stable_since = Some(tokio::time::Instant::now()),
+                    Some(_) => {}
+                }
+            } else {
+                stable_since = None;
+            }
+
+            tokio::time::sleep(std::time::Duration::from_millis(10)).await;
+        }
+    })
+    .await??;
+
+    Ok(())
+}
+
 #[tokio::test]
 async fn thread_unsubscribe_unloads_thread_and_emits_thread_closed_notification() -> Result<()> {
    let server = create_mock_responses_server_repeating_assistant("Done").await;
@@ -168,6 +213,13 @@ async fn thread_unsubscribe_during_turn_interrupts_turn_and_emits_thread_closed(
    };
    assert_eq!(payload.thread_id, thread_id);

+    wait_for_responses_request_count_to_stabilize(
+        &server,
+        1,
+        std::time::Duration::from_millis(200),
+    )
+    .await?;
+
    Ok(())
 }

--- a/codex-rs/core/BUILD.bazel
+++ b/codex-rs/core/BUILD.bazel
@@ -34,7 +34,7 @@ codex_rust_crate(
        "models.json",
        "prompt.md",
    ],
-    test_data_extra = [
+    test_data_extra = glob(["src/**/snapshots/**"]) + [
        "config.schema.json",
        # This is a bit of a hack, but empirically, some of our integration tests
        # are relying on the presence of this file as a repo root marker. When
--- a/codex-rs/core/src/codex_tests_guardian.rs
+++ b/codex-rs/core/src/codex_tests_guardian.rs
@@ -2,24 +2,21 @@ use super::*;
 use crate::config_loader::ConfigLayerEntry;
 use crate::config_loader::ConfigRequirements;
 use crate::config_loader::ConfigRequirementsToml;
-use crate::exec::ExecParams;
 use crate::exec_policy::ExecPolicyManager;
 use crate::features::Feature;
 use crate::guardian::GUARDIAN_SUBAGENT_NAME;
 use crate::protocol::AskForApproval;
 use crate::sandboxing::SandboxPermissions;
+use crate::tools::handlers::normalize_and_validate_additional_permissions;
 use crate::turn_diff_tracker::TurnDiffTracker;
 use codex_app_server_protocol::ConfigLayerSource;
 use codex_execpolicy::Decision;
 use codex_execpolicy::Evaluation;
 use codex_execpolicy::RuleMatch;
-use codex_protocol::models::FunctionCallOutputBody;
 use codex_protocol::models::NetworkPermissions;
 use codex_protocol::models::PermissionProfile;
 use codex_utils_absolute_path::AbsolutePathBuf;
 use pretty_assertions::assert_eq;
-use serde::Deserialize;
-use std::collections::HashMap;
 use std::fs;
 use std::sync::Arc;
 use tempfile::tempdir;
@@ -39,89 +36,23 @@ async fn guardian_allows_shell_additional_permissions_requests_past_policy_valid
        .features
        .enable(Feature::RequestPermissions)
        .expect("test setup should allow enabling request permissions");
-    turn_context_raw
-        .sandbox_policy
-        .set(SandboxPolicy::DangerFullAccess)
-        .expect("test setup should allow updating sandbox policy");
-    let session = Arc::new(session);
-    let turn_context = Arc::new(turn_context_raw);
-
-    let params = ExecParams {
-        command: if cfg!(windows) {
-            vec![
-                "cmd.exe".to_string(),
-                "/C".to_string(),
-                "echo hi".to_string(),
-            ]
-        } else {
-            vec![
-                "/bin/sh".to_string(),
-                "-c".to_string(),
-                "echo hi".to_string(),
-            ]
-        },
-        cwd: turn_context.cwd.clone(),
-        expiration: 1000.into(),
-        env: HashMap::new(),
-        network: None,
-        sandbox_permissions: SandboxPermissions::WithAdditionalPermissions,
-        windows_sandbox_level: turn_context.windows_sandbox_level,
-        justification: Some("test".to_string()),
-        arg0: None,
+    let additional_permissions = PermissionProfile {
+        network: Some(NetworkPermissions {
+            enabled: Some(true),
+        }),
+        file_system: None,
+        macos: None,
    };
+    let normalized = normalize_and_validate_additional_permissions(
+        session.features().enabled(Feature::RequestPermissions),
+        turn_context_raw.approval_policy.value(),
+        SandboxPermissions::WithAdditionalPermissions,
+        Some(additional_permissions.clone()),
+        &turn_context_raw.cwd,
+    )
+    .expect("shell additional permissions should pass policy validation");

-    let handler = ShellHandler;
-    let resp = handler
-        .handle(ToolInvocation {
-            session: Arc::clone(&session),
-            turn: Arc::clone(&turn_context),
-            tracker: Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new())),
-            call_id: "test-call".to_string(),
-            tool_name: "shell".to_string(),
-            payload: ToolPayload::Function {
-                arguments: serde_json::json!({
-                    "command": params.command.clone(),
-                    "workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
-                    "timeout_ms": params.expiration.timeout_ms(),
-                    "sandbox_permissions": params.sandbox_permissions,
-                    "additional_permissions": PermissionProfile {
-                        network: Some(NetworkPermissions {
-                            enabled: Some(true),
-                        }),
-                        file_system: None,
-                        macos: None,
-                    },
-                    "justification": params.justification.clone(),
-                })
-                .to_string(),
-            },
-        })
-        .await;
-
-    let output = match resp.expect("expected Ok result") {
-        ToolOutput::Function {
-            body: FunctionCallOutputBody::Text(content),
-            ..
-        } => content,
-        _ => panic!("unexpected tool output"),
-    };
-
-    #[derive(Deserialize, PartialEq, Eq, Debug)]
-    struct ResponseExecMetadata {
-        exit_code: i32,
-    }
-
-    #[derive(Deserialize)]
-    struct ResponseExecOutput {
-        output: String,
-        metadata: ResponseExecMetadata,
-    }
-
-    let exec_output: ResponseExecOutput =
-        serde_json::from_str(&output).expect("valid exec output json");
-
-    assert_eq!(exec_output.metadata, ResponseExecMetadata { exit_code: 0 });
-    assert!(exec_output.output.contains("hi"));
+    assert_eq!(normalized, Some(additional_permissions));
 }

 #[tokio::test]
--- a/codex-rs/core/src/guardian.rs
+++ b/codex-rs/core/src/guardian.rs
@@ -664,12 +664,14 @@ fn truncate_guardian_action_value(value: Value) -> Value {
                .map(truncate_guardian_action_value)
                .collect::<Vec<_>>(),
        ),
-        Value::Object(values) => Value::Object(
-            values
+        Value::Object(values) => {
+            let mut entries = values
                .into_iter()
                .map(|(key, value)| (key, truncate_guardian_action_value(value)))
-                .collect(),
-        ),
+                .collect::<Vec<_>>();
+            entries.sort_by(|(left, _), (right, _)| left.cmp(right));
+            Value::Object(entries.into_iter().collect())
+        }
        other => other,
    }
 }
--- a/codex-rs/core/src/guardian_tests.rs
+++ b/codex-rs/core/src/guardian_tests.rs
@@ -169,8 +169,28 @@ fn format_guardian_action_pretty_truncates_large_string_fields() {
        .as_str()
        .expect("test patch should serialize as a string");

+    let change_count_index = rendered
+        .find("\"change_count\"")
+        .expect("rendered json should contain change_count");
+    let cwd_index = rendered
+        .find("\"cwd\"")
+        .expect("rendered json should contain cwd");
+    let files_index = rendered
+        .find("\"files\"")
+        .expect("rendered json should contain files");
+    let patch_index = rendered
+        .find("\"patch\"")
+        .expect("rendered json should contain patch");
+    let tool_index = rendered
+        .find("\"tool\"")
+        .expect("rendered json should contain tool");
+
    assert!(rendered.contains("\"tool\": \"apply_patch\""));
    assert!(rendered.len() < original_patch.len());
+    assert!(change_count_index < cwd_index);
+    assert!(cwd_index < files_index);
+    assert!(files_index < patch_index);
+    assert!(patch_index < tool_index);
 }

 #[test]
--- a/codex-rs/core/src/snapshots/codex_coreguardiantests__guardian_review_request_layout.snap
+++ b/codex-rs/core/src/snapshots/codex_coreguardiantests__guardian_review_request_layout.snap
@@ -1,5 +1,5 @@
 ---
-source: core/src/guardian.rs
+source: core/src/guardian_tests.rs
 expression: "context_snapshot::format_labeled_requests_snapshot(\"Guardian review request layout\",\n&[(\"Guardian Review Request\", &request)], &ContextSnapshotOptions::default(),)"
 ---
 Scenario: Guardian review request layout
--- a/codex-rs/core/src/tools/handlers/mod.rs
+++ b/codex-rs/core/src/tools/handlers/mod.rs
@@ -90,7 +90,7 @@ fn resolve_workdir_base_path(

 /// Validates feature/policy constraints for `with_additional_permissions` and
 /// normalizes any path-based permissions. Errors if the request is invalid.
-pub(super) fn normalize_and_validate_additional_permissions(
+pub(crate) fn normalize_and_validate_additional_permissions(
    request_permission_enabled: bool,
    approval_policy: AskForApproval,
    sandbox_permissions: SandboxPermissions,
--- a/codex-rs/tui/src/chatwidget/snapshots/codex_tuichatwidgettests__experimental_popup_includes_guardian_approval@linux.snap
+++ b/codex-rs/tui/src/chatwidget/snapshots/codex_tuichatwidgettests__experimental_popup_includes_guardian_approval@linux.snap
@@ -0,0 +1,19 @@
+---
+source: tui/src/chatwidget/tests.rs
+expression: popup
+---
+  Experimental features
+  Toggle experimental features. Changes are saved to config.toml.
+
+› [ ] JavaScript REPL              Enable a persistent Node-backed JavaScript REPL for interactive website debugging
+                                   and other inline JavaScript execution capabilities. Requires Node >= v22.22.0
+                                   installed.
+  [ ] Bubblewrap sandbox           Try the new linux sandbox based on bubblewrap.
+  [ ] Multi-agents                 Ask Codex to spawn multiple agents to parallelize the work and win in efficiency.
+  [ ] Apps                         Use a connected ChatGPT App using "$". Install Apps via /apps command. Restart
+                                   Codex after enabling.
+  [ ] Guardian approvals           Let a guardian subagent review `on-request` approval prompts instead of showing
+                                   them to you, including sandbox escapes and blocked network access.
+  [ ] Prevent sleep while running  Keep your computer awake while Codex is running a thread.
+
+  Press space to select or enter to save for next conversation
--- a/codex-rs/tui/src/chatwidget/tests.rs
+++ b/codex-rs/tui/src/chatwidget/tests.rs
@@ -6949,6 +6949,11 @@ async fn experimental_popup_includes_guardian_approval() {
    chat.open_experimental_popup();

    let popup = render_bottom_popup(&chat, 120);
+    #[cfg(target_os = "linux")]
+    insta::with_settings!({ snapshot_suffix => "linux" }, {
+        assert_snapshot!("experimental_popup_includes_guardian_approval", popup);
+    });
+    #[cfg(not(target_os = "linux"))]
    assert_snapshot!("experimental_popup_includes_guardian_approval", popup);
 }

--- a/defs.bzl
+++ b/defs.bzl
@@ -80,10 +80,15 @@ def codex_rust_crate(
            `CARGO_BIN_EXE_*` environment variables. These are only needed for binaries from a different crate.
    """
    test_env = {
-        "INSTA_WORKSPACE_ROOT": ".",
+        "INSTA_WORKSPACE_ROOT": "codex-rs",
        "INSTA_SNAPSHOT_PATH": "src",
    }

+    cargo_like_package = native.package_name()
+    if cargo_like_package.startswith("codex-rs/"):
+        cargo_like_package = cargo_like_package[len("codex-rs/"):]
+    snapshot_path_remap = "--remap-path-prefix=%s=%s" % (native.package_name(), cargo_like_package)
+
    rustc_env = {
        "BAZEL_PACKAGE": native.package_name(),
    } | rustc_env
@@ -127,7 +132,9 @@ def codex_rust_crate(
            crate = name,
            env = test_env,
            deps = all_crate_deps(normal = True, normal_dev = True) + maybe_deps + deps_extra,
-            rustc_flags = rustc_flags_extra,
+            # Keep `file!()` paths Cargo-like (`core/src/...`) instead of
+            # Bazel package-prefixed (`codex-rs/core/src/...`) for snapshot parity.
+            rustc_flags = rustc_flags_extra + [snapshot_path_remap],
            rustc_env = rustc_env,
            data = test_data_extra,
            tags = test_tags,
@@ -174,11 +181,11 @@ def codex_rust_crate(
            compile_data = native.glob(["tests/**"], allow_empty = True) + integration_compile_data_extra,
            deps = all_crate_deps(normal = True, normal_dev = True) + maybe_deps + deps_extra,
            # Keep `file!()` paths Cargo-like (`core/tests/...`) instead of
-            # Bazel workspace-prefixed (`codex-rs/core/tests/...`) for snapshot parity.
-            rustc_flags = rustc_flags_extra + ["--remap-path-prefix=codex-rs="],
+            # Bazel package-prefixed (`codex-rs/core/tests/...`) for snapshot parity.
+            rustc_flags = rustc_flags_extra + [snapshot_path_remap],
            rustc_env = rustc_env,
            # Important: do not merge `test_env` here. Its unit-test-only
-            # `INSTA_WORKSPACE_ROOT="."` can point integration tests at the
+            # `INSTA_WORKSPACE_ROOT="codex-rs"` can point integration tests at the
            # runfiles cwd and cause false `.snap.new` churn on Linux.
            env = cargo_env,
            tags = test_tags,
Author	SHA1	Message	Date
Ahmed Ibrahim	38e772e463	codex: restart PR CI (#13871 )	2026-03-07 23:18:45 -08:00
Ahmed Ibrahim	30e8a01356	codex: reconnect PR ancestry (#13871 )	2026-03-07 11:30:15 -08:00
Ahmed Ibrahim	b03266984e	codex: align Bazel snapshot source remap	2026-03-07 11:28:06 -08:00
Ahmed Ibrahim	d5ecd1b750	codex: restart PR CI (#13871 )	2026-03-07 11:24:09 -08:00
Ahmed Ibrahim	934a0da85a	codex: fix guardian snapshot drift in PR CI	2026-03-07 11:20:59 -08:00
Ahmed Ibrahim	5f1a510823	codex: stabilize thread unsubscribe app-server test	2026-03-07 11:07:02 -08:00
Ahmed Ibrahim	7f10eb22a1	codex: refresh guardian approval snapshot (#13593 )	2026-03-07 10:58:05 -08:00
Ahmed Ibrahim	6cadc1e883	codex: sort guardian action json (#13593 )	2026-03-07 10:50:19 -08:00
Ahmed Ibrahim	5fd9f40810	codex: fix Bazel insta snapshot paths (#13593 )	2026-03-07 10:43:19 -08:00
Ahmed Ibrahim	8a9c9277ba	codex: align guardian snapshot metadata	2026-03-07 10:26:30 -08:00
Ahmed Ibrahim	4f9683bd25	codex: include core snapshots in bazel tests	2026-03-07 10:25:38 -08:00
Ahmed Ibrahim	545ffd8a6c	codex: fix non-linux guardian popup snapshot	2026-03-07 10:22:27 -08:00
Ahmed Ibrahim	bd0641a108	codex: stabilize guardian snapshots on linux	2026-03-07 10:20:02 -08:00
Ahmed Ibrahim	47b2b37721	codex: restore guardian popup snapshot	2026-03-07 10:19:19 -08:00
Ahmed Ibrahim	ed56314b3f	codex: fix guardian popup snapshot on PR #13871	2026-03-07 10:13:51 -08:00
Ahmed Ibrahim	7ad3becc2f	codex: stabilize guardian permissions validation test on PR #13871	2026-03-07 09:55:21 -08:00
Ahmed Ibrahim	f661b3a617	codex: fix guardian CI drift on PR #13871	2026-03-07 09:40:40 -08:00
Ahmed Ibrahim	a9c2c45683	Merge remote-tracking branch 'origin/main' into dev/flaky-thread-unsubscribe-test	2026-03-07 09:11:01 -08:00
Ahmed Ibrahim	c9bbf3121b	codex: stabilize thread unsubscribe app-server test	2026-03-07 08:44:30 -08:00