Merge branch 'main' into xli-codex/fix-marketplace-local-source-windows

2026-05-06 06:12:59 +03:00 · 2026-04-14 20:56:33 -07:00
parent 7d239adf60 d34bc66466
commit e278e01348
10 changed files with 183 additions and 599 deletions
--- a/.github/workflows/rust-ci-full.yml
+++ b/.github/workflows/rust-ci-full.yml
@@ -664,6 +664,7 @@ jobs:
          export CODEX_TEST_REMOTE_ENV_CONTAINER_NAME=codex-remote-test-env
          source "${GITHUB_WORKSPACE}/scripts/test-remote-env.sh"
          echo "CODEX_TEST_REMOTE_ENV=${CODEX_TEST_REMOTE_ENV}" >> "$GITHUB_ENV"
+          echo "CODEX_TEST_REMOTE_EXEC_SERVER_URL=${CODEX_TEST_REMOTE_EXEC_SERVER_URL}" >> "$GITHUB_ENV"

      - name: tests
        id: test
--- a/codex-rs/core/src/external_agent_config.rs
+++ b/codex-rs/core/src/external_agent_config.rs
@@ -3,7 +3,6 @@ use crate::plugins::PluginId;
 use crate::plugins::PluginInstallRequest;
 use crate::plugins::PluginsManager;
 use crate::plugins::add_marketplace;
-use codex_utils_absolute_path::AbsolutePathBuf;
 use serde_json::Value as JsonValue;
 use std::collections::BTreeMap;
 use std::collections::HashSet;
@@ -185,7 +184,7 @@ impl ExternalAgentConfigService {
            |repo_root| repo_root.join(".codex").join("config.toml"),
        );
        if let Some(settings) = settings.as_ref() {
-            let migrated = build_config_from_external(&settings)?;
+            let migrated = build_config_from_external(settings)?;
            if !is_empty_toml_table(&migrated) {
                let mut should_include = true;
                if target_config.exists() {
@@ -352,23 +351,12 @@ impl ExternalAgentConfigService {
            let add_marketplace_outcome = add_marketplace(self.codex_home.clone(), request).await;
            let marketplace_path = match add_marketplace_outcome {
                Ok(add_marketplace_outcome) => {
-                    match AbsolutePathBuf::try_from(
-                        add_marketplace_outcome
-                            .installed_root
-                            .join(INSTALLED_MARKETPLACE_MANIFEST_RELATIVE_PATH),
-                    ) {
-                        Ok(path) => {
-                            outcome
-                                .succeeded_marketplaces
-                                .push(marketplace_name.clone());
-                            path
-                        }
-                        Err(_) => {
-                            outcome.failed_marketplaces.push(marketplace_name);
-                            outcome.failed_plugin_ids.extend(plugin_ids);
-                            continue;
-                        }
-                    }
+                    outcome
+                        .succeeded_marketplaces
+                        .push(marketplace_name.clone());
+                    add_marketplace_outcome
+                        .installed_root
+                        .join(INSTALLED_MARKETPLACE_MANIFEST_RELATIVE_PATH)
                }
                Err(_) => {
                    outcome.failed_marketplaces.push(marketplace_name);
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -139,7 +139,6 @@ pub use project_doc::discover_project_doc_paths;
 pub use project_doc::read_project_docs;
 mod rollout;
 pub(crate) mod safety;
-pub mod seatbelt;
 mod session_rollout_init_error;
 pub mod shell;
 pub(crate) mod shell_snapshot;
--- a/codex-rs/core/src/seatbelt.rs
+++ b/codex-rs/core/src/seatbelt.rs
@@ -1,48 +0,0 @@
-#![cfg(target_os = "macos")]
-
-use crate::spawn::CODEX_SANDBOX_ENV_VAR;
-use crate::spawn::SpawnChildRequest;
-use crate::spawn::StdioPolicy;
-use crate::spawn::spawn_child_async;
-use codex_network_proxy::NetworkProxy;
-use codex_protocol::permissions::FileSystemSandboxPolicy;
-use codex_protocol::permissions::NetworkSandboxPolicy;
-use codex_protocol::protocol::SandboxPolicy;
-use codex_sandboxing::seatbelt::MACOS_PATH_TO_SEATBELT_EXECUTABLE;
-use codex_sandboxing::seatbelt::create_seatbelt_command_args_for_policies;
-use codex_utils_absolute_path::AbsolutePathBuf;
-use std::collections::HashMap;
-use std::path::PathBuf;
-use tokio::process::Child;
-
-pub async fn spawn_command_under_seatbelt(
-    command: Vec<String>,
-    command_cwd: AbsolutePathBuf,
-    sandbox_policy: &SandboxPolicy,
-    sandbox_policy_cwd: &AbsolutePathBuf,
-    stdio_policy: StdioPolicy,
-    network: Option<&NetworkProxy>,
-    mut env: HashMap<String, String>,
-) -> std::io::Result<Child> {
-    let args = create_seatbelt_command_args_for_policies(
-        command,
-        &FileSystemSandboxPolicy::from_legacy_sandbox_policy(sandbox_policy, sandbox_policy_cwd),
-        NetworkSandboxPolicy::from(sandbox_policy),
-        sandbox_policy_cwd,
-        /*enforce_managed_network*/ false,
-        network,
-    );
-    let arg0 = None;
-    env.insert(CODEX_SANDBOX_ENV_VAR.to_string(), "seatbelt".to_string());
-    spawn_child_async(SpawnChildRequest {
-        program: PathBuf::from(MACOS_PATH_TO_SEATBELT_EXECUTABLE),
-        args,
-        arg0,
-        cwd: command_cwd,
-        network_sandbox_policy: NetworkSandboxPolicy::from(sandbox_policy),
-        network,
-        stdio_policy,
-        env,
-    })
-    .await
-}
--- a/codex-rs/core/tests/common/test_codex.rs
+++ b/codex-rs/core/tests/common/test_codex.rs
@@ -7,8 +7,6 @@ use std::process::Command;
 use std::sync::Arc;
 use std::sync::atomic::AtomicU64;
 use std::sync::atomic::Ordering;
-use std::time::Duration;
-use std::time::Instant;

 use anyhow::Context;
 use anyhow::Result;
@@ -44,7 +42,6 @@ use tempfile::TempDir;
 use wiremock::MockServer;

 use crate::PathBufExt;
-use crate::RemoteEnvConfig;
 use crate::TempDirExt;
 use crate::get_remote_test_env;
 use crate::load_default_config_for_test;
@@ -62,50 +59,15 @@ type PreBuildHook = dyn FnOnce(&Path) + Send + 'static;
 type WorkspaceSetup = dyn FnOnce(AbsolutePathBuf, Arc<dyn ExecutorFileSystem>) -> BoxFuture<'static, Result<()>>
    + Send;
 const TEST_MODEL_WITH_EXPERIMENTAL_TOOLS: &str = "test-gpt-5.1-codex";
-const REMOTE_EXEC_SERVER_START_TIMEOUT: Duration = Duration::from_secs(5);
-const REMOTE_EXEC_SERVER_POLL_INTERVAL: Duration = Duration::from_millis(25);
-static REMOTE_EXEC_SERVER_INSTANCE_COUNTER: AtomicU64 = AtomicU64::new(0);
-
-#[derive(Debug)]
-struct RemoteExecServerProcess {
-    container_name: String,
-    pid: u32,
-    remote_exec_server_path: String,
-    stdout_path: String,
-    cleanup_paths: Vec<String>,
-}
-
-impl Drop for RemoteExecServerProcess {
-    fn drop(&mut self) {
-        let cleanup_paths = self.cleanup_paths.join(" ");
-        let cleanup_paths_script = if cleanup_paths.is_empty() {
-            String::new()
-        } else {
-            format!("rm -rf {cleanup_paths}; ")
-        };
-        let script = format!(
-            "if kill -0 {pid} 2>/dev/null; then kill {pid}; fi; {cleanup_paths_script}rm -f {remote_exec_server_path} {stdout_path}",
-            pid = self.pid,
-            cleanup_paths_script = cleanup_paths_script,
-            remote_exec_server_path = self.remote_exec_server_path,
-            stdout_path = self.stdout_path
-        );
-        let _ = docker_command_capture_stdout(["exec", &self.container_name, "sh", "-lc", &script]);
-    }
-}
-
-impl RemoteExecServerProcess {
-    fn register_cleanup_path(&mut self, path: &Path) {
-        self.cleanup_paths.push(path.display().to_string());
-    }
-}
+const REMOTE_EXEC_SERVER_URL_ENV_VAR: &str = "CODEX_TEST_REMOTE_EXEC_SERVER_URL";
+static REMOTE_TEST_INSTANCE_COUNTER: AtomicU64 = AtomicU64::new(0);

 #[derive(Debug)]
 pub struct TestEnv {
    environment: codex_exec_server::Environment,
    cwd: AbsolutePathBuf,
    local_cwd_temp_dir: Option<Arc<TempDir>>,
-    _remote_exec_server_process: Option<RemoteExecServerProcess>,
+    remote_container_name: Option<String>,
 }

 impl TestEnv {
@@ -117,7 +79,7 @@ impl TestEnv {
            environment,
            cwd,
            local_cwd_temp_dir: Some(local_cwd_temp_dir),
-            _remote_exec_server_process: None,
+            remote_container_name: None,
        })
    }

@@ -138,12 +100,19 @@ impl TestEnv {
    }
 }

+impl Drop for TestEnv {
+    fn drop(&mut self) {
+        if let Some(container_name) = &self.remote_container_name {
+            let script = format!("rm -rf {}", self.cwd.as_path().display());
+            let _ = docker_command_capture_stdout(["exec", container_name, "sh", "-lc", &script]);
+        }
+    }
+}
+
 pub async fn test_env() -> Result<TestEnv> {
    match get_remote_test_env() {
        Some(remote_env) => {
-            let mut remote_process = start_remote_exec_server(&remote_env)?;
-            let remote_ip = remote_container_ip(&remote_env.container_name)?;
-            let websocket_url = rewrite_websocket_host(&remote_process.listen_url, &remote_ip)?;
+            let websocket_url = remote_exec_server_url()?;
            let environment = codex_exec_server::Environment::create(Some(websocket_url)).await?;
            let cwd = remote_aware_cwd_path();
            environment
@@ -154,184 +123,43 @@ pub async fn test_env() -> Result<TestEnv> {
                    /*sandbox*/ None,
                )
                .await?;
-            remote_process.process.register_cleanup_path(cwd.as_path());
            Ok(TestEnv {
                environment,
                cwd,
                local_cwd_temp_dir: None,
-                _remote_exec_server_process: Some(remote_process.process),
+                remote_container_name: Some(remote_env.container_name),
            })
        }
        None => TestEnv::local().await,
    }
 }

-struct RemoteExecServerStart {
-    process: RemoteExecServerProcess,
-    listen_url: String,
-}
-
-fn start_remote_exec_server(remote_env: &RemoteEnvConfig) -> Result<RemoteExecServerStart> {
-    let container_name = remote_env.container_name.as_str();
-    let instance_id = remote_exec_server_instance_id();
-    let remote_exec_server_path = format!("/tmp/codex-{instance_id}");
-    let remote_linux_sandbox_path = format!("/tmp/codex-linux-sandbox-{instance_id}");
-    let stdout_path = format!("/tmp/codex-exec-server-{instance_id}.stdout");
-    let local_binary = codex_utils_cargo_bin::cargo_bin("codex").context("resolve codex binary")?;
-    let local_linux_sandbox = codex_utils_cargo_bin::cargo_bin("codex-linux-sandbox")
-        .context("resolve codex-linux-sandbox binary")?;
-    let local_binary = local_binary.to_string_lossy().to_string();
-    let local_linux_sandbox = local_linux_sandbox.to_string_lossy().to_string();
-    let remote_binary = format!("{container_name}:{remote_exec_server_path}");
-    let remote_linux_sandbox = format!("{container_name}:{remote_linux_sandbox_path}");
-
-    docker_command_success(["cp", &local_binary, &remote_binary])?;
-    docker_command_success(["cp", &local_linux_sandbox, &remote_linux_sandbox])?;
-    docker_command_success([
-        "exec",
-        container_name,
-        "chmod",
-        "+x",
-        &remote_exec_server_path,
-    ])?;
-    docker_command_success([
-        "exec",
-        container_name,
-        "chmod",
-        "+x",
-        &remote_linux_sandbox_path,
-    ])?;
-    probe_remote_linux_sandbox(container_name, &remote_linux_sandbox_path)?;
-
-    let start_script = format!(
-        "rm -f {stdout_path}; \
-nohup {remote_exec_server_path} exec-server --listen ws://0.0.0.0:0 > {stdout_path} 2>&1 & \
-echo $!"
-    );
-    let pid_output =
-        docker_command_capture_stdout(["exec", container_name, "sh", "-lc", &start_script])?;
-    let pid = pid_output
-        .trim()
-        .parse::<u32>()
-        .with_context(|| format!("parse remote exec-server PID from {pid_output:?}"))?;
-
-    let listen_url = wait_for_remote_listen_url(container_name, &stdout_path)?;
-
-    Ok(RemoteExecServerStart {
-        process: RemoteExecServerProcess {
-            container_name: container_name.to_string(),
-            pid,
-            remote_exec_server_path,
-            stdout_path,
-            cleanup_paths: vec![remote_linux_sandbox_path],
-        },
-        listen_url,
-    })
-}
-
-fn probe_remote_linux_sandbox(container_name: &str, remote_linux_sandbox_path: &str) -> Result<()> {
-    let policy = serde_json::to_string(&SandboxPolicy::new_read_only_policy())
-        .context("serialize remote sandbox probe policy")?;
-    let probe_script = format!(
-        "{remote_linux_sandbox_path} --sandbox-policy-cwd /tmp --sandbox-policy '{policy}' -- /bin/true"
-    );
-    let output = Command::new("docker")
-        .args(["exec", container_name, "sh", "-lc", &probe_script])
-        .output()
-        .with_context(|| format!("probe remote linux sandbox in container `{container_name}`"))?;
-    if !output.status.success() {
-        return Err(anyhow!(
-            "remote linux sandbox probe failed in container `{container_name}`: stdout={} stderr={}",
-            String::from_utf8_lossy(&output.stdout).trim(),
-            String::from_utf8_lossy(&output.stderr).trim()
-        ));
-    }
-    Ok(())
-}
-
 fn remote_aware_cwd_path() -> AbsolutePathBuf {
    PathBuf::from(format!(
        "/tmp/codex-core-test-cwd-{}",
-        remote_exec_server_instance_id()
+        remote_test_instance_id()
    ))
    .abs()
 }

-fn wait_for_remote_listen_url(container_name: &str, stdout_path: &str) -> Result<String> {
-    let deadline = Instant::now() + REMOTE_EXEC_SERVER_START_TIMEOUT;
-    loop {
-        let line = docker_command_capture_stdout([
-            "exec",
-            container_name,
-            "sh",
-            "-lc",
-            &format!("head -n 1 {stdout_path} 2>/dev/null || true"),
-        ])?;
-        let listen_url = line.trim();
-        if listen_url.starts_with("ws://") {
-            return Ok(listen_url.to_string());
-        }
-
-        if Instant::now() >= deadline {
-            return Err(anyhow!(
-                "timed out waiting for remote exec-server listen URL in container `{container_name}` after {REMOTE_EXEC_SERVER_START_TIMEOUT:?}"
-            ));
-        }
-        std::thread::sleep(REMOTE_EXEC_SERVER_POLL_INTERVAL);
+fn remote_exec_server_url() -> Result<String> {
+    let listen_url = std::env::var(REMOTE_EXEC_SERVER_URL_ENV_VAR).with_context(|| {
+        format!("{REMOTE_EXEC_SERVER_URL_ENV_VAR} must be set for remote tests")
+    })?;
+    let listen_url = listen_url.trim();
+    if listen_url.is_empty() {
+        return Err(anyhow!(
+            "{REMOTE_EXEC_SERVER_URL_ENV_VAR} must not be empty"
+        ));
    }
+    Ok(listen_url.to_string())
 }

-fn remote_exec_server_instance_id() -> String {
-    let instance = REMOTE_EXEC_SERVER_INSTANCE_COUNTER.fetch_add(1, Ordering::Relaxed);
+fn remote_test_instance_id() -> String {
+    let instance = REMOTE_TEST_INSTANCE_COUNTER.fetch_add(1, Ordering::Relaxed);
    format!("{}-{instance}", std::process::id())
 }

-fn remote_container_ip(container_name: &str) -> Result<String> {
-    let ip = docker_command_capture_stdout([
-        "inspect",
-        "-f",
-        "{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}",
-        container_name,
-    ])?;
-    let ip = ip.trim();
-    if ip.is_empty() {
-        return Err(anyhow!(
-            "container `{container_name}` has no IP address; cannot connect to remote exec-server"
-        ));
-    }
-    Ok(ip.to_string())
-}
-
-fn rewrite_websocket_host(listen_url: &str, host: &str) -> Result<String> {
-    let Some(address) = listen_url.strip_prefix("ws://") else {
-        return Err(anyhow!(
-            "unexpected websocket listen URL `{listen_url}`; expected ws://IP:PORT"
-        ));
-    };
-    let Some((_, port)) = address.rsplit_once(':') else {
-        return Err(anyhow!(
-            "unexpected websocket listen URL `{listen_url}`; expected ws://IP:PORT"
-        ));
-    };
-    Ok(format!("ws://{host}:{port}"))
-}
-
-fn docker_command_success<const N: usize>(args: [&str; N]) -> Result<()> {
-    let output = Command::new("docker")
-        .args(args)
-        .output()
-        .with_context(|| format!("run docker {args:?}"))?;
-    if !output.status.success() {
-        return Err(anyhow!(
-            "docker {:?} failed: stdout={} stderr={}",
-            args,
-            String::from_utf8_lossy(&output.stdout).trim(),
-            String::from_utf8_lossy(&output.stderr).trim()
-        ));
-    }
-    Ok(())
-}
-
 fn docker_command_capture_stdout<const N: usize>(args: [&str; N]) -> Result<String> {
    let output = Command::new("docker")
        .args(args)
--- a/codex-rs/core/tests/suite/exec.rs
+++ b/codex-rs/core/tests/suite/exec.rs
@@ -1,8 +1,5 @@
 #![cfg(target_os = "macos")]

-use std::collections::HashMap;
-use std::string::ToString;
-
 use codex_core::exec::ExecCapturePolicy;
 use codex_core::exec::ExecParams;
 use codex_core::exec::process_exec_tool_call;
@@ -17,6 +14,7 @@ use codex_protocol::protocol::SandboxPolicy;
 use codex_sandboxing::SandboxType;
 use codex_sandboxing::get_platform_sandbox;
 use core_test_support::PathExt;
+use std::collections::HashMap;
 use tempfile::TempDir;

 fn skip_test() -> bool {
@@ -29,14 +27,18 @@ fn skip_test() -> bool {
 }

 #[expect(clippy::expect_used)]
-async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>) -> Result<ExecToolCallOutput> {
+async fn run_test_cmd<I, S>(tmp: TempDir, command: I) -> Result<ExecToolCallOutput>
+where
+    I: IntoIterator<Item = S>,
+    S: Into<String>,
+{
    let sandbox_type = get_platform_sandbox(/*windows_sandbox_enabled*/ false)
        .expect("should be able to get sandbox type");
    assert_eq!(sandbox_type, SandboxType::MacosSeatbelt);
    let cwd = tmp.path().abs();

    let params = ExecParams {
-        command: cmd.iter().map(ToString::to_string).collect(),
+        command: command.into_iter().map(Into::into).collect(),
        cwd: cwd.clone(),
        expiration: 1000.into(),
        capture_policy: ExecCapturePolicy::ShellTool,
@@ -129,6 +131,37 @@ async fn exit_command_not_found_is_ok() {
    run_test_cmd(tmp, cmd).await.unwrap();
 }

+#[tokio::test]
+async fn openpty_works_under_real_exec_seatbelt_path() {
+    if skip_test() {
+        return;
+    }
+
+    let python = match which::which("python3") {
+        Ok(path) => path,
+        Err(_) => {
+            eprintln!("python3 not found in PATH, skipping test.");
+            return;
+        }
+    };
+
+    let tmp = TempDir::new().expect("should be able to create temp dir");
+    let cmd = vec![
+        python.to_string_lossy().into_owned(),
+        "-c".to_string(),
+        r#"import os
+
+master, slave = os.openpty()
+os.write(slave, b"ping")
+assert os.read(master, 4) == b"ping""#
+            .to_string(),
+    ];
+
+    let output = run_test_cmd(tmp, cmd).await.unwrap();
+    assert_eq!(output.stdout.text, "");
+    assert_eq!(output.stderr.text, "");
+}
+
 /// Writing a file fails and should be considered a sandbox error
 #[tokio::test]
 async fn write_file_fails_as_sandbox_error() {
@@ -139,7 +172,7 @@ async fn write_file_fails_as_sandbox_error() {
    let tmp = TempDir::new().expect("should be able to create temp dir");
    let path = tmp.path().join("test.txt");
    let cmd = vec![
-        "/user/bin/touch",
+        "/usr/bin/touch",
        path.to_str().expect("should be able to get path"),
    ];

--- a/codex-rs/core/tests/suite/mod.rs
+++ b/codex-rs/core/tests/suite/mod.rs
@@ -85,7 +85,6 @@ mod rmcp_client;
 mod rollout_list_find;
 mod safety_check_downgrade;
 mod search_tool;
-mod seatbelt;
 mod shell_command;
 mod shell_serialization;
 mod shell_snapshot;
--- a/codex-rs/core/tests/suite/seatbelt.rs
+++ b/codex-rs/core/tests/suite/seatbelt.rs
@@ -1,316 +0,0 @@
-#![cfg(target_os = "macos")]
-
-//! Tests for the macOS sandboxing that are specific to Seatbelt.
-//! Tests that apply to both Mac and Linux sandboxing should go in sandbox.rs.
-
-use std::collections::HashMap;
-use std::path::Path;
-use std::path::PathBuf;
-
-use codex_core::seatbelt::spawn_command_under_seatbelt;
-use codex_core::spawn::CODEX_SANDBOX_ENV_VAR;
-use codex_core::spawn::StdioPolicy;
-use codex_protocol::protocol::SandboxPolicy;
-use codex_utils_absolute_path::AbsolutePathBuf;
-use tempfile::TempDir;
-
-struct TestScenario {
-    repo_parent: PathBuf,
-    file_outside_repo: PathBuf,
-    repo_root: PathBuf,
-    file_in_repo_root: PathBuf,
-    file_in_dot_git_dir: PathBuf,
-}
-
-struct TestExpectations {
-    file_outside_repo_is_writable: bool,
-    file_in_repo_root_is_writable: bool,
-    file_in_dot_git_dir_is_writable: bool,
-}
-
-impl TestScenario {
-    async fn run_test(&self, policy: &SandboxPolicy, expectations: TestExpectations) {
-        if std::env::var(CODEX_SANDBOX_ENV_VAR) == Ok("seatbelt".to_string()) {
-            eprintln!("{CODEX_SANDBOX_ENV_VAR} is set to 'seatbelt', skipping test.");
-            return;
-        }
-
-        assert_eq!(
-            touch(&self.file_outside_repo, policy).await,
-            expectations.file_outside_repo_is_writable
-        );
-        assert_eq!(
-            self.file_outside_repo.exists(),
-            expectations.file_outside_repo_is_writable
-        );
-
-        assert_eq!(
-            touch(&self.file_in_repo_root, policy).await,
-            expectations.file_in_repo_root_is_writable
-        );
-        assert_eq!(
-            self.file_in_repo_root.exists(),
-            expectations.file_in_repo_root_is_writable
-        );
-
-        assert_eq!(
-            touch(&self.file_in_dot_git_dir, policy).await,
-            expectations.file_in_dot_git_dir_is_writable
-        );
-        assert_eq!(
-            self.file_in_dot_git_dir.exists(),
-            expectations.file_in_dot_git_dir_is_writable
-        );
-    }
-}
-
-/// If the user has added a workspace root that is not a Git repo root, then
-/// the user has to specify `--skip-git-repo-check` or go through some
-/// interstitial that indicates they are taking on some risk because Git
-/// cannot be used to backup their work before the agent begins.
-///
-/// Because the user has agreed to this risk, we do not try find all .git
-/// folders in the workspace and block them (though we could change our
-/// position on this in the future).
-#[tokio::test]
-async fn if_parent_of_repo_is_writable_then_dot_git_folder_is_writable() {
-    let tmp = TempDir::new().expect("should be able to create temp dir");
-    let test_scenario = create_test_scenario(&tmp);
-    let policy = SandboxPolicy::WorkspaceWrite {
-        writable_roots: vec![test_scenario.repo_parent.as_path().try_into().unwrap()],
-        read_only_access: Default::default(),
-        network_access: false,
-        exclude_tmpdir_env_var: true,
-        exclude_slash_tmp: true,
-    };
-
-    test_scenario
-        .run_test(
-            &policy,
-            TestExpectations {
-                file_outside_repo_is_writable: true,
-                file_in_repo_root_is_writable: true,
-                file_in_dot_git_dir_is_writable: true,
-            },
-        )
-        .await;
-}
-
-/// When the writable root is the root of a Git repository (as evidenced by the
-/// presence of a .git folder), then the .git folder should be read-only if
-/// the policy is `WorkspaceWrite`.
-#[tokio::test]
-async fn if_git_repo_is_writable_root_then_dot_git_folder_is_read_only() {
-    let tmp = TempDir::new().expect("should be able to create temp dir");
-    let test_scenario = create_test_scenario(&tmp);
-    let policy = SandboxPolicy::WorkspaceWrite {
-        writable_roots: vec![test_scenario.repo_root.as_path().try_into().unwrap()],
-        read_only_access: Default::default(),
-        network_access: false,
-        exclude_tmpdir_env_var: true,
-        exclude_slash_tmp: true,
-    };
-
-    test_scenario
-        .run_test(
-            &policy,
-            TestExpectations {
-                file_outside_repo_is_writable: false,
-                file_in_repo_root_is_writable: true,
-                file_in_dot_git_dir_is_writable: false,
-            },
-        )
-        .await;
-}
-
-/// Under DangerFullAccess, all writes should be permitted anywhere on disk,
-/// including inside the .git folder.
-#[tokio::test]
-async fn danger_full_access_allows_all_writes() {
-    let tmp = TempDir::new().expect("should be able to create temp dir");
-    let test_scenario = create_test_scenario(&tmp);
-    let policy = SandboxPolicy::DangerFullAccess;
-
-    test_scenario
-        .run_test(
-            &policy,
-            TestExpectations {
-                file_outside_repo_is_writable: true,
-                file_in_repo_root_is_writable: true,
-                file_in_dot_git_dir_is_writable: true,
-            },
-        )
-        .await;
-}
-
-/// Under ReadOnly, writes should not be permitted anywhere on disk.
-#[tokio::test]
-async fn read_only_forbids_all_writes() {
-    let tmp = TempDir::new().expect("should be able to create temp dir");
-    let test_scenario = create_test_scenario(&tmp);
-    let policy = SandboxPolicy::new_read_only_policy();
-
-    test_scenario
-        .run_test(
-            &policy,
-            TestExpectations {
-                file_outside_repo_is_writable: false,
-                file_in_repo_root_is_writable: false,
-                file_in_dot_git_dir_is_writable: false,
-            },
-        )
-        .await;
-}
-
-#[tokio::test]
-async fn openpty_works_under_seatbelt() {
-    if std::env::var(CODEX_SANDBOX_ENV_VAR) == Ok("seatbelt".to_string()) {
-        eprintln!("{CODEX_SANDBOX_ENV_VAR} is set to 'seatbelt', skipping test.");
-        return;
-    }
-
-    if which::which("python3").is_err() {
-        eprintln!("python3 not found in PATH, skipping test.");
-        return;
-    }
-
-    let policy = SandboxPolicy::new_read_only_policy();
-    let command_cwd = AbsolutePathBuf::current_dir().expect("getcwd");
-    let sandbox_cwd = command_cwd.clone();
-
-    let mut child = spawn_command_under_seatbelt(
-        vec![
-            "python3".to_string(),
-            "-c".to_string(),
-            r#"import os
-
-master, slave = os.openpty()
-os.write(slave, b"ping")
-assert os.read(master, 4) == b"ping""#
-                .to_string(),
-        ],
-        command_cwd,
-        &policy,
-        &sandbox_cwd,
-        StdioPolicy::RedirectForShellTool,
-        /*network*/ None,
-        HashMap::new(),
-    )
-    .await
-    .expect("should be able to spawn python under seatbelt");
-
-    let status = child
-        .wait()
-        .await
-        .expect("should be able to wait for child process");
-    assert!(status.success(), "python exited with {status:?}");
-}
-
-#[tokio::test]
-async fn java_home_finds_runtime_under_seatbelt() {
-    if std::env::var(CODEX_SANDBOX_ENV_VAR) == Ok("seatbelt".to_string()) {
-        eprintln!("{CODEX_SANDBOX_ENV_VAR} is set to 'seatbelt', skipping test.");
-        return;
-    }
-
-    let java_home_path = Path::new("/usr/libexec/java_home");
-    if !java_home_path.exists() {
-        eprintln!("/usr/libexec/java_home is not present, skipping test.");
-        return;
-    }
-
-    let baseline_output = tokio::process::Command::new(java_home_path)
-        .env_remove("JAVA_HOME")
-        .output()
-        .await
-        .expect("should be able to invoke java_home outside seatbelt");
-    if !baseline_output.status.success() {
-        eprintln!(
-            "java_home exited with {:?} outside seatbelt, skipping test",
-            baseline_output.status
-        );
-        return;
-    }
-
-    let policy = SandboxPolicy::new_read_only_policy();
-    let command_cwd = AbsolutePathBuf::current_dir().expect("getcwd");
-    let sandbox_cwd = command_cwd.clone();
-
-    let mut env: HashMap<String, String> = std::env::vars().collect();
-    env.remove("JAVA_HOME");
-    env.remove(CODEX_SANDBOX_ENV_VAR);
-
-    let child = spawn_command_under_seatbelt(
-        vec![java_home_path.to_string_lossy().to_string()],
-        command_cwd,
-        &policy,
-        &sandbox_cwd,
-        StdioPolicy::RedirectForShellTool,
-        /*network*/ None,
-        env,
-    )
-    .await
-    .expect("should be able to spawn java_home under seatbelt");
-
-    let output = child
-        .wait_with_output()
-        .await
-        .expect("should be able to wait for java_home child");
-    assert!(
-        output.status.success(),
-        "java_home under seatbelt exited with {:?}, stderr: {}",
-        output.status,
-        String::from_utf8_lossy(&output.stderr)
-    );
-
-    let stdout = String::from_utf8_lossy(&output.stdout);
-    assert!(
-        !stdout.trim().is_empty(),
-        "java_home stdout unexpectedly empty under seatbelt"
-    );
-}
-
-#[expect(clippy::expect_used)]
-fn create_test_scenario(tmp: &TempDir) -> TestScenario {
-    let repo_parent = tmp.path().to_path_buf();
-    let repo_root = repo_parent.join("repo");
-    let dot_git_dir = repo_root.join(".git");
-
-    std::fs::create_dir(&repo_root).expect("should be able to create repo root");
-    std::fs::create_dir(&dot_git_dir).expect("should be able to create .git dir");
-
-    TestScenario {
-        file_outside_repo: repo_parent.join("outside.txt"),
-        repo_parent,
-        file_in_repo_root: repo_root.join("repo_file.txt"),
-        repo_root,
-        file_in_dot_git_dir: dot_git_dir.join("dot_git_file.txt"),
-    }
-}
-
-#[expect(clippy::expect_used)]
-/// Note that `path` must be absolute.
-async fn touch(path: &Path, policy: &SandboxPolicy) -> bool {
-    assert!(path.is_absolute(), "Path must be absolute: {path:?}");
-    let command_cwd = AbsolutePathBuf::current_dir().expect("getcwd");
-    let sandbox_cwd = command_cwd.clone();
-    let mut child = spawn_command_under_seatbelt(
-        vec![
-            "/usr/bin/touch".to_string(),
-            path.to_string_lossy().to_string(),
-        ],
-        command_cwd,
-        policy,
-        &sandbox_cwd,
-        StdioPolicy::RedirectForShellTool,
-        /*network*/ None,
-        HashMap::new(),
-    )
-    .await
-    .expect("should be able to spawn command under seatbelt");
-    child
-        .wait()
-        .await
-        .expect("should be able to wait for child process")
-        .success()
-}
--- a/codex-rs/exec/tests/suite/sandbox.rs
+++ b/codex-rs/exec/tests/suite/sandbox.rs
@@ -19,17 +19,67 @@ async fn spawn_command_under_sandbox(
    stdio_policy: StdioPolicy,
    env: HashMap<String, String>,
 ) -> std::io::Result<Child> {
-    use codex_core::seatbelt::spawn_command_under_seatbelt;
-    spawn_command_under_seatbelt(
-        command,
-        command_cwd,
+    use codex_core::exec::ExecCapturePolicy;
+    use codex_core::exec::ExecParams;
+    use codex_core::exec::build_exec_request;
+    use codex_core::sandboxing::SandboxPermissions;
+    use codex_protocol::config_types::WindowsSandboxLevel;
+    use codex_protocol::permissions::FileSystemSandboxPolicy;
+    use codex_protocol::permissions::NetworkSandboxPolicy;
+    use std::process::Stdio;
+
+    let codex_linux_sandbox_exe = None;
+    let exec_request = build_exec_request(
+        ExecParams {
+            command,
+            cwd: command_cwd,
+            expiration: 1000.into(),
+            capture_policy: ExecCapturePolicy::ShellTool,
+            env,
+            network: None,
+            sandbox_permissions: SandboxPermissions::UseDefault,
+            windows_sandbox_level: WindowsSandboxLevel::Disabled,
+            windows_sandbox_private_desktop: false,
+            justification: None,
+            arg0: None,
+        },
        sandbox_policy,
+        &FileSystemSandboxPolicy::from_legacy_sandbox_policy(sandbox_policy, sandbox_cwd),
+        NetworkSandboxPolicy::from(sandbox_policy),
        sandbox_cwd,
-        stdio_policy,
-        /*network*/ None,
-        env,
+        &codex_linux_sandbox_exe,
+        /*use_legacy_landlock*/ false,
    )
-    .await
+    .map_err(|err| io::Error::other(err.to_string()))?;
+
+    let (program, args) = exec_request
+        .command
+        .split_first()
+        .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidInput, "command args are empty"))?;
+
+    let mut child = tokio::process::Command::new(program);
+    if let Some(arg0) = exec_request.arg0.as_deref() {
+        child.arg0(arg0);
+    }
+    child.args(args);
+    child.current_dir(exec_request.cwd);
+    child.env_clear();
+    child.envs(exec_request.env);
+
+    match stdio_policy {
+        StdioPolicy::RedirectForShellTool => {
+            child.stdin(Stdio::null());
+            child.stdout(Stdio::piped()).stderr(Stdio::piped());
+        }
+        StdioPolicy::Inherit => {
+            child
+                .stdin(Stdio::inherit())
+                .stdout(Stdio::inherit())
+                .stderr(Stdio::inherit());
+        }
+    }
+
+    child.kill_on_drop(true).spawn()
 }

 #[cfg(target_os = "linux")]
--- a/scripts/test-remote-env.sh
+++ b/scripts/test-remote-env.sh
@@ -18,6 +18,11 @@ is_sourced() {
 setup_remote_env() {
  local container_name
  local codex_binary_path
+  local container_ip
+  local remote_codex_path
+  local remote_exec_server_pid
+  local remote_exec_server_port
+  local remote_exec_server_stdout_path

  container_name="${CODEX_TEST_REMOTE_ENV_CONTAINER_NAME:-codex-remote-test-env-local-$(date +%s)-${RANDOM}}"
  codex_binary_path="${REPO_ROOT}/codex-rs/target/debug/codex"
@@ -59,14 +64,58 @@ setup_remote_env() {
    return 1
  fi

+  if [[ -z "${CODEX_TEST_REMOTE_EXEC_SERVER_URL:-}" ]]; then
+    remote_codex_path="/tmp/codex-remote-env/codex"
+    remote_exec_server_port="31987"
+    remote_exec_server_stdout_path="/tmp/codex-remote-env/exec-server.stdout"
+    docker exec "${container_name}" sh -lc "mkdir -p /tmp/codex-remote-env"
+    docker cp "${codex_binary_path}" "${container_name}:${remote_codex_path}"
+    docker exec "${container_name}" chmod +x "${remote_codex_path}"
+    remote_exec_server_pid="$(
+      docker exec "${container_name}" sh -lc \
+        "rm -f ${remote_exec_server_stdout_path}; nohup ${remote_codex_path} exec-server --listen ws://0.0.0.0:${remote_exec_server_port} > ${remote_exec_server_stdout_path} 2>&1 & echo \$!"
+    )"
+    wait_for_remote_exec_server_port "${container_name}" "${remote_exec_server_port}" "${remote_exec_server_stdout_path}"
+    container_ip="$(
+      docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "${container_name}"
+    )"
+    if [[ -z "${container_ip}" ]]; then
+      echo "container ${container_name} has no IP address" >&2
+      docker rm -f "${container_name}" >/dev/null 2>&1 || true
+      return 1
+    fi
+    export CODEX_TEST_REMOTE_EXEC_SERVER_PID="${remote_exec_server_pid}"
+    export CODEX_TEST_REMOTE_EXEC_SERVER_URL="ws://${container_ip}:${remote_exec_server_port}"
+  fi
+
  export CODEX_TEST_REMOTE_ENV="${container_name}"
 }

+wait_for_remote_exec_server_port() {
+  local container_name="$1"
+  local port="$2"
+  local stdout_path="$3"
+  local deadline=$((SECONDS + 5))
+
+  while (( SECONDS < deadline )); do
+    if docker exec "${container_name}" python3 -c "import socket; socket.create_connection(('127.0.0.1', ${port}), timeout=0.2).close()" >/dev/null 2>&1; then
+      return 0
+    fi
+    sleep 0.025
+  done
+
+  echo "timed out waiting for remote exec-server on ${container_name}:${port}" >&2
+  docker exec "${container_name}" sh -lc "cat ${stdout_path} 2>/dev/null || true" >&2 || true
+  return 1
+}
+
 codex_remote_env_cleanup() {
  if [[ -n "${CODEX_TEST_REMOTE_ENV:-}" ]]; then
    docker rm -f "${CODEX_TEST_REMOTE_ENV}" >/dev/null 2>&1 || true
    unset CODEX_TEST_REMOTE_ENV
  fi
+  unset CODEX_TEST_REMOTE_EXEC_SERVER_PID
+  unset CODEX_TEST_REMOTE_EXEC_SERVER_URL
 }

 if ! is_sourced; then
@@ -79,6 +128,7 @@ set -euo pipefail
 if setup_remote_env; then
  status=0
  echo "CODEX_TEST_REMOTE_ENV=${CODEX_TEST_REMOTE_ENV}"
+  echo "CODEX_TEST_REMOTE_EXEC_SERVER_URL=${CODEX_TEST_REMOTE_EXEC_SERVER_URL}"
  echo "Remote env ready. Run your command, then call: codex_remote_env_cleanup"
 else
  status=$?