add response proxy subagent header test

Fix flaky permissions escalation test on Windows (#16825 )
Problem: `rejects_escalated_permissions_when_policy_not_on_request` retried a real shell command after asserting the escalation rejection, so Windows CI could fail on command startup timing instead of approval behavior. Solution: Keep the rejection assertion, verify no turn permissions were granted, and assert through exec-policy evaluation that the same command would be allowed without escalation instead of timing a subprocess.
2026-04-06 07:31:37 +03:00 · 2026-04-05 20:52:00 -07:00 · 2026-04-05 10:51:01 -07:00 · 2026-04-05 10:50:29 -07:00
4 changed files with 276 additions and 69 deletions
--- a/codex-rs/core/src/codex_tests.rs
+++ b/codex-rs/core/src/codex_tests.rs
@@ -43,7 +43,6 @@ use crate::state::TaskKind;
 use crate::tasks::SessionTask;
 use crate::tasks::SessionTaskContext;
 use crate::tools::ToolRouter;
-use crate::tools::context::FunctionToolOutput;
 use crate::tools::context::ToolInvocation;
 use crate::tools::context::ToolPayload;
 use crate::tools::handlers::ShellHandler;
@@ -120,12 +119,6 @@ use std::time::Duration as StdDuration;
 #[path = "codex_tests_guardian.rs"]
 mod guardian_tests;

-use codex_protocol::models::function_call_output_content_items_to_text;
-
-fn expect_text_tool_output(output: &FunctionToolOutput) -> String {
-    function_call_output_content_items_to_text(&output.body).unwrap_or_default()
-}
-
 struct InstructionsTestCase {
    slug: &'static str,
    expects_apply_patch_instructions: bool,
@@ -5348,7 +5341,9 @@ async fn sample_rollout(
 #[tokio::test]
 async fn rejects_escalated_permissions_when_policy_not_on_request() {
    use crate::exec::ExecParams;
+    use crate::exec_policy::ExecApprovalRequest;
    use crate::sandboxing::SandboxPermissions;
+    use crate::tools::sandboxing::ExecApprovalRequirement;
    use crate::turn_diff_tracker::TurnDiffTracker;
    use codex_protocol::protocol::AskForApproval;
    use codex_protocol::protocol::SandboxPolicy;
@@ -5394,23 +5389,6 @@ async fn rejects_escalated_permissions_when_policy_not_on_request() {
        arg0: None,
    };

-    let params2 = ExecParams {
-        sandbox_permissions: SandboxPermissions::UseDefault,
-        command: params.command.clone(),
-        cwd: params.cwd.clone(),
-        expiration: timeout_ms.into(),
-        capture_policy: ExecCapturePolicy::ShellTool,
-        env: HashMap::new(),
-        network: None,
-        windows_sandbox_level: turn_context.windows_sandbox_level,
-        windows_sandbox_private_desktop: turn_context
-            .config
-            .permissions
-            .windows_sandbox_private_desktop,
-        justification: params.justification.clone(),
-        arg0: None,
-    };
-
    let turn_diff_tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));

    let tool_name = "shell";
@@ -5448,9 +5426,11 @@ async fn rejects_escalated_permissions_when_policy_not_on_request() {
    );

    pretty_assertions::assert_eq!(output, expected);
+    pretty_assertions::assert_eq!(session.granted_turn_permissions().await, None);

-    // Now retry the same command WITHOUT escalated permissions; should succeed.
-    // Force DangerFullAccess to avoid platform sandbox dependencies in tests.
+    // The rejection should not poison the non-escalated path for the same
+    // command. Force DangerFullAccess so this check stays focused on approval
+    // policy rather than platform-specific sandbox behavior.
    let turn_context_mut = Arc::get_mut(&mut turn_context).expect("unique turn context Arc");
    turn_context_mut
        .sandbox_policy
@@ -5461,45 +5441,22 @@ async fn rejects_escalated_permissions_when_policy_not_on_request() {
    turn_context_mut.network_sandbox_policy =
        NetworkSandboxPolicy::from(turn_context_mut.sandbox_policy.get());

-    let resp2 = handler
-        .handle(ToolInvocation {
-            session: Arc::clone(&session),
-            turn: Arc::clone(&turn_context),
-            tracker: Arc::clone(&turn_diff_tracker),
-            call_id: "test-call-2".to_string(),
-            tool_name: tool_name.to_string(),
-            tool_namespace: None,
-            payload: ToolPayload::Function {
-                arguments: serde_json::json!({
-                    "command": params2.command.clone(),
-                    "workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
-                    "timeout_ms": params2.expiration.timeout_ms(),
-                    "sandbox_permissions": params2.sandbox_permissions,
-                    "justification": params2.justification.clone(),
-                })
-                .to_string(),
-            },
+    let exec_approval_requirement = session
+        .services
+        .exec_policy
+        .create_exec_approval_requirement_for_command(ExecApprovalRequest {
+            command: &params.command,
+            approval_policy: turn_context.approval_policy.value(),
+            sandbox_policy: turn_context.sandbox_policy.get(),
+            file_system_sandbox_policy: &turn_context.file_system_sandbox_policy,
+            sandbox_permissions: SandboxPermissions::UseDefault,
+            prefix_rule: None,
        })
        .await;
-
-    let output = expect_text_tool_output(&resp2.expect("expected Ok result"));
-
-    #[derive(Deserialize, PartialEq, Eq, Debug)]
-    struct ResponseExecMetadata {
-        exit_code: i32,
-    }
-
-    #[derive(Deserialize)]
-    struct ResponseExecOutput {
-        output: String,
-        metadata: ResponseExecMetadata,
-    }
-
-    let exec_output: ResponseExecOutput =
-        serde_json::from_str(&output).expect("valid exec output json");
-
-    pretty_assertions::assert_eq!(exec_output.metadata, ResponseExecMetadata { exit_code: 0 });
-    assert!(exec_output.output.contains("hi"));
+    assert!(matches!(
+        exec_approval_requirement,
+        ExecApprovalRequirement::Skip { .. }
+    ));
 }
 #[tokio::test]
 async fn unified_exec_rejects_escalated_permissions_when_policy_not_on_request() {
--- a/codex-rs/core/tests/responses_headers.rs
+++ b/codex-rs/core/tests/responses_headers.rs
@@ -23,6 +23,14 @@ use pretty_assertions::assert_eq;
 use tempfile::TempDir;
 use wiremock::matchers::header;

+fn normalize_git_remote_url(url: &str) -> String {
+    let normalized = url.trim().trim_end_matches('/');
+    normalized
+        .strip_suffix(".git")
+        .unwrap_or(normalized)
+        .to_string()
+}
+
 #[tokio::test]
 async fn responses_stream_includes_subagent_header_on_review() {
    core_test_support::skip_if_no_network!();
@@ -540,13 +548,15 @@ async fn responses_stream_includes_turn_metadata_header_for_git_workspace_e2e()
            .and_then(serde_json::Value::as_str),
        Some(expected_head.as_str())
    );
+    let actual_origin = workspace
+        .get("associated_remote_urls")
+        .and_then(serde_json::Value::as_object)
+        .and_then(|remotes| remotes.get("origin"))
+        .and_then(serde_json::Value::as_str)
+        .expect("origin remote should be present");
    assert_eq!(
-        workspace
-            .get("associated_remote_urls")
-            .and_then(serde_json::Value::as_object)
-            .and_then(|remotes| remotes.get("origin"))
-            .and_then(serde_json::Value::as_str),
-        Some(expected_origin.as_str())
+        normalize_git_remote_url(actual_origin),
+        normalize_git_remote_url(&expected_origin)
    );
    assert_eq!(
        workspace
--- a/codex-rs/core/tests/suite/mod.rs
+++ b/codex-rs/core/tests/suite/mod.rs
@@ -122,6 +122,7 @@ mod request_permissions;
 #[cfg(not(target_os = "windows"))]
 mod request_permissions_tool;
 mod request_user_input;
+mod responses_api_proxy_headers;
 mod resume;
 mod resume_warning;
 mod review;
--- a/codex-rs/core/tests/suite/responses_api_proxy_headers.rs
+++ b/codex-rs/core/tests/suite/responses_api_proxy_headers.rs
@@ -0,0 +1,239 @@
+//! Exercises a real `responses-api-proxy` process with request dumping enabled, then verifies that
+//! parent and spawned subagent requests carry the expected window, parent-thread, and subagent
+//! identity headers in the dumped Responses API requests.
+
+use anyhow::Result;
+use anyhow::anyhow;
+use codex_features::Feature;
+use core_test_support::responses::ev_assistant_message;
+use core_test_support::responses::ev_completed;
+use core_test_support::responses::ev_function_call;
+use core_test_support::responses::ev_response_created;
+use core_test_support::responses::mount_sse_once_match;
+use core_test_support::responses::sse;
+use core_test_support::responses::start_mock_server;
+use core_test_support::skip_if_no_network;
+use core_test_support::test_codex::test_codex;
+use pretty_assertions::assert_eq;
+use serde_json::Value;
+use serde_json::json;
+use std::io::Write;
+use std::path::Path;
+use std::process::Child;
+use std::process::Command as StdCommand;
+use std::process::Stdio;
+use std::time::Duration;
+use std::time::Instant;
+use tempfile::TempDir;
+
+const PARENT_PROMPT: &str = "spawn a subagent and report when it is started";
+const CHILD_PROMPT: &str = "child: say done";
+const SPAWN_CALL_ID: &str = "spawn-call-1";
+const PROXY_START_TIMEOUT: Duration = Duration::from_secs(/*secs*/ 5);
+const PROXY_POLL_INTERVAL: Duration = Duration::from_millis(/*millis*/ 20);
+
+struct ResponsesApiProxy {
+    child: Child,
+    port: u16,
+}
+
+impl ResponsesApiProxy {
+    fn start(upstream_url: &str, dump_dir: &Path) -> Result<Self> {
+        let server_info = dump_dir.join("server-info.json");
+        let mut child = StdCommand::new(codex_utils_cargo_bin::cargo_bin("codex")?)
+            .args(["responses-api-proxy", "--server-info"])
+            .arg(&server_info)
+            .args(["--upstream-url", upstream_url, "--dump-dir"])
+            .arg(dump_dir)
+            .stdin(Stdio::piped())
+            .stdout(Stdio::null())
+            .stderr(Stdio::null())
+            .spawn()?;
+
+        child
+            .stdin
+            .take()
+            .ok_or_else(|| anyhow!("responses-api-proxy stdin was not piped"))?
+            .write_all(b"dummy\n")?;
+
+        let deadline = Instant::now() + PROXY_START_TIMEOUT;
+        loop {
+            if let Ok(info) = std::fs::read_to_string(&server_info) {
+                let port = serde_json::from_str::<Value>(&info)?
+                    .get("port")
+                    .and_then(Value::as_u64)
+                    .ok_or_else(|| anyhow!("proxy server info missing port"))?;
+                return Ok(Self {
+                    child,
+                    port: u16::try_from(port)?,
+                });
+            }
+            if let Some(status) = child.try_wait()? {
+                return Err(anyhow!(
+                    "responses-api-proxy exited before writing server info: {status}"
+                ));
+            }
+            if Instant::now() >= deadline {
+                return Err(anyhow!("timed out waiting for responses-api-proxy"));
+            }
+            std::thread::sleep(PROXY_POLL_INTERVAL);
+        }
+    }
+
+    fn base_url(&self) -> String {
+        format!("http://127.0.0.1:{}/v1", self.port)
+    }
+}
+
+impl Drop for ResponsesApiProxy {
+    fn drop(&mut self) {
+        let _ = self.child.kill();
+        let _ = self.child.wait();
+    }
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn responses_api_proxy_dumps_parent_and_subagent_identity_headers() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+    let dump_dir = TempDir::new()?;
+    let proxy =
+        ResponsesApiProxy::start(&format!("{}/v1/responses", server.uri()), dump_dir.path())?;
+
+    let spawn_args = serde_json::to_string(&json!({ "message": CHILD_PROMPT }))?;
+    mount_sse_once_match(
+        &server,
+        |req: &wiremock::Request| request_body_contains(req, PARENT_PROMPT),
+        sse(vec![
+            ev_response_created("resp-parent-1"),
+            ev_function_call(SPAWN_CALL_ID, "spawn_agent", &spawn_args),
+            ev_completed("resp-parent-1"),
+        ]),
+    )
+    .await;
+    mount_sse_once_match(
+        &server,
+        |req: &wiremock::Request| {
+            request_body_contains(req, CHILD_PROMPT) && !request_body_contains(req, SPAWN_CALL_ID)
+        },
+        sse(vec![
+            ev_response_created("resp-child-1"),
+            ev_assistant_message("msg-child-1", "child done"),
+            ev_completed("resp-child-1"),
+        ]),
+    )
+    .await;
+    mount_sse_once_match(
+        &server,
+        |req: &wiremock::Request| request_body_contains(req, SPAWN_CALL_ID),
+        sse(vec![
+            ev_response_created("resp-parent-2"),
+            ev_assistant_message("msg-parent-2", "parent done"),
+            ev_completed("resp-parent-2"),
+        ]),
+    )
+    .await;
+
+    let proxy_base_url = proxy.base_url();
+    let mut builder = test_codex().with_config(move |config| {
+        config.model_provider.base_url = Some(proxy_base_url);
+        config
+            .features
+            .disable(Feature::EnableRequestCompression)
+            .expect("test config should allow feature update");
+    });
+    let test = builder.build(&server).await?;
+    test.submit_turn(PARENT_PROMPT).await?;
+
+    let dumps = wait_for_proxy_request_dumps(dump_dir.path())?;
+    let parent = dumps
+        .iter()
+        .find(|dump| dump_body_contains(dump, PARENT_PROMPT))
+        .ok_or_else(|| anyhow!("missing parent request dump"))?;
+    let child = dumps
+        .iter()
+        .find(|dump| {
+            dump_body_contains(dump, CHILD_PROMPT) && !dump_body_contains(dump, SPAWN_CALL_ID)
+        })
+        .ok_or_else(|| anyhow!("missing child request dump"))?;
+
+    let parent_window_id = header(parent, "x-codex-window-id")
+        .ok_or_else(|| anyhow!("parent request missing x-codex-window-id"))?;
+    let child_window_id = header(child, "x-codex-window-id")
+        .ok_or_else(|| anyhow!("child request missing x-codex-window-id"))?;
+    let (parent_thread_id, parent_generation) = split_window_id(parent_window_id)?;
+    let (child_thread_id, child_generation) = split_window_id(child_window_id)?;
+
+    assert_eq!(parent_generation, 0);
+    assert_eq!(child_generation, 0);
+    assert!(child_thread_id != parent_thread_id);
+    assert_eq!(header(parent, "x-openai-subagent"), None);
+    assert_eq!(header(child, "x-openai-subagent"), Some("collab_spawn"));
+    assert_eq!(
+        header(child, "x-codex-parent-thread-id"),
+        Some(parent_thread_id)
+    );
+
+    Ok(())
+}
+
+fn request_body_contains(req: &wiremock::Request, text: &str) -> bool {
+    std::str::from_utf8(&req.body).is_ok_and(|body| body.contains(text))
+}
+
+fn wait_for_proxy_request_dumps(dump_dir: &Path) -> Result<Vec<Value>> {
+    let deadline = Instant::now() + Duration::from_secs(/*secs*/ 2);
+    loop {
+        let dumps = read_proxy_request_dumps(dump_dir).unwrap_or_default();
+        if dumps.len() >= 3
+            && dumps
+                .iter()
+                .any(|dump| dump_body_contains(dump, CHILD_PROMPT))
+        {
+            return Ok(dumps);
+        }
+        if Instant::now() >= deadline {
+            return Err(anyhow!(
+                "timed out waiting for proxy request dumps, got {}",
+                dumps.len()
+            ));
+        }
+        std::thread::sleep(PROXY_POLL_INTERVAL);
+    }
+}
+
+fn read_proxy_request_dumps(dump_dir: &Path) -> Result<Vec<Value>> {
+    let mut dumps = Vec::new();
+    for entry in std::fs::read_dir(dump_dir)? {
+        let path = entry?.path();
+        if path
+            .file_name()
+            .and_then(|name| name.to_str())
+            .is_some_and(|name| name.ends_with("-request.json"))
+        {
+            dumps.push(serde_json::from_str(&std::fs::read_to_string(&path)?)?);
+        }
+    }
+    Ok(dumps)
+}
+
+fn dump_body_contains(dump: &Value, text: &str) -> bool {
+    dump.get("body")
+        .is_some_and(|body| body.to_string().contains(text))
+}
+
+fn header<'a>(dump: &'a Value, name: &str) -> Option<&'a str> {
+    dump.get("headers")?.as_array()?.iter().find_map(|header| {
+        (header.get("name")?.as_str()?.eq_ignore_ascii_case(name))
+            .then(|| header.get("value")?.as_str())
+            .flatten()
+    })
+}
+
+fn split_window_id(window_id: &str) -> Result<(&str, u64)> {
+    let (thread_id, generation) = window_id
+        .rsplit_once(':')
+        .ok_or_else(|| anyhow!("invalid window id header: {window_id}"))?;
+    Ok((thread_id, generation.parse::<u64>()?))
+}