mirror of
https://github.com/openai/codex.git
synced 2026-05-06 06:12:59 +03:00
Merge branch 'main' into xli-codex/fix-marketplace-local-source-windows
This commit is contained in:
1
.github/workflows/rust-ci-full.yml
vendored
1
.github/workflows/rust-ci-full.yml
vendored
@@ -664,6 +664,7 @@ jobs:
|
||||
export CODEX_TEST_REMOTE_ENV_CONTAINER_NAME=codex-remote-test-env
|
||||
source "${GITHUB_WORKSPACE}/scripts/test-remote-env.sh"
|
||||
echo "CODEX_TEST_REMOTE_ENV=${CODEX_TEST_REMOTE_ENV}" >> "$GITHUB_ENV"
|
||||
echo "CODEX_TEST_REMOTE_EXEC_SERVER_URL=${CODEX_TEST_REMOTE_EXEC_SERVER_URL}" >> "$GITHUB_ENV"
|
||||
|
||||
- name: tests
|
||||
id: test
|
||||
|
||||
@@ -3,7 +3,6 @@ use crate::plugins::PluginId;
|
||||
use crate::plugins::PluginInstallRequest;
|
||||
use crate::plugins::PluginsManager;
|
||||
use crate::plugins::add_marketplace;
|
||||
use codex_utils_absolute_path::AbsolutePathBuf;
|
||||
use serde_json::Value as JsonValue;
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::HashSet;
|
||||
@@ -185,7 +184,7 @@ impl ExternalAgentConfigService {
|
||||
|repo_root| repo_root.join(".codex").join("config.toml"),
|
||||
);
|
||||
if let Some(settings) = settings.as_ref() {
|
||||
let migrated = build_config_from_external(&settings)?;
|
||||
let migrated = build_config_from_external(settings)?;
|
||||
if !is_empty_toml_table(&migrated) {
|
||||
let mut should_include = true;
|
||||
if target_config.exists() {
|
||||
@@ -352,23 +351,12 @@ impl ExternalAgentConfigService {
|
||||
let add_marketplace_outcome = add_marketplace(self.codex_home.clone(), request).await;
|
||||
let marketplace_path = match add_marketplace_outcome {
|
||||
Ok(add_marketplace_outcome) => {
|
||||
match AbsolutePathBuf::try_from(
|
||||
add_marketplace_outcome
|
||||
.installed_root
|
||||
.join(INSTALLED_MARKETPLACE_MANIFEST_RELATIVE_PATH),
|
||||
) {
|
||||
Ok(path) => {
|
||||
outcome
|
||||
.succeeded_marketplaces
|
||||
.push(marketplace_name.clone());
|
||||
path
|
||||
}
|
||||
Err(_) => {
|
||||
outcome.failed_marketplaces.push(marketplace_name);
|
||||
outcome.failed_plugin_ids.extend(plugin_ids);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
outcome
|
||||
.succeeded_marketplaces
|
||||
.push(marketplace_name.clone());
|
||||
add_marketplace_outcome
|
||||
.installed_root
|
||||
.join(INSTALLED_MARKETPLACE_MANIFEST_RELATIVE_PATH)
|
||||
}
|
||||
Err(_) => {
|
||||
outcome.failed_marketplaces.push(marketplace_name);
|
||||
|
||||
@@ -139,7 +139,6 @@ pub use project_doc::discover_project_doc_paths;
|
||||
pub use project_doc::read_project_docs;
|
||||
mod rollout;
|
||||
pub(crate) mod safety;
|
||||
pub mod seatbelt;
|
||||
mod session_rollout_init_error;
|
||||
pub mod shell;
|
||||
pub(crate) mod shell_snapshot;
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
#![cfg(target_os = "macos")]
|
||||
|
||||
use crate::spawn::CODEX_SANDBOX_ENV_VAR;
|
||||
use crate::spawn::SpawnChildRequest;
|
||||
use crate::spawn::StdioPolicy;
|
||||
use crate::spawn::spawn_child_async;
|
||||
use codex_network_proxy::NetworkProxy;
|
||||
use codex_protocol::permissions::FileSystemSandboxPolicy;
|
||||
use codex_protocol::permissions::NetworkSandboxPolicy;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use codex_sandboxing::seatbelt::MACOS_PATH_TO_SEATBELT_EXECUTABLE;
|
||||
use codex_sandboxing::seatbelt::create_seatbelt_command_args_for_policies;
|
||||
use codex_utils_absolute_path::AbsolutePathBuf;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use tokio::process::Child;
|
||||
|
||||
pub async fn spawn_command_under_seatbelt(
|
||||
command: Vec<String>,
|
||||
command_cwd: AbsolutePathBuf,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
sandbox_policy_cwd: &AbsolutePathBuf,
|
||||
stdio_policy: StdioPolicy,
|
||||
network: Option<&NetworkProxy>,
|
||||
mut env: HashMap<String, String>,
|
||||
) -> std::io::Result<Child> {
|
||||
let args = create_seatbelt_command_args_for_policies(
|
||||
command,
|
||||
&FileSystemSandboxPolicy::from_legacy_sandbox_policy(sandbox_policy, sandbox_policy_cwd),
|
||||
NetworkSandboxPolicy::from(sandbox_policy),
|
||||
sandbox_policy_cwd,
|
||||
/*enforce_managed_network*/ false,
|
||||
network,
|
||||
);
|
||||
let arg0 = None;
|
||||
env.insert(CODEX_SANDBOX_ENV_VAR.to_string(), "seatbelt".to_string());
|
||||
spawn_child_async(SpawnChildRequest {
|
||||
program: PathBuf::from(MACOS_PATH_TO_SEATBELT_EXECUTABLE),
|
||||
args,
|
||||
arg0,
|
||||
cwd: command_cwd,
|
||||
network_sandbox_policy: NetworkSandboxPolicy::from(sandbox_policy),
|
||||
network,
|
||||
stdio_policy,
|
||||
env,
|
||||
})
|
||||
.await
|
||||
}
|
||||
@@ -7,8 +7,6 @@ use std::process::Command;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::AtomicU64;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
@@ -44,7 +42,6 @@ use tempfile::TempDir;
|
||||
use wiremock::MockServer;
|
||||
|
||||
use crate::PathBufExt;
|
||||
use crate::RemoteEnvConfig;
|
||||
use crate::TempDirExt;
|
||||
use crate::get_remote_test_env;
|
||||
use crate::load_default_config_for_test;
|
||||
@@ -62,50 +59,15 @@ type PreBuildHook = dyn FnOnce(&Path) + Send + 'static;
|
||||
type WorkspaceSetup = dyn FnOnce(AbsolutePathBuf, Arc<dyn ExecutorFileSystem>) -> BoxFuture<'static, Result<()>>
|
||||
+ Send;
|
||||
const TEST_MODEL_WITH_EXPERIMENTAL_TOOLS: &str = "test-gpt-5.1-codex";
|
||||
const REMOTE_EXEC_SERVER_START_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
const REMOTE_EXEC_SERVER_POLL_INTERVAL: Duration = Duration::from_millis(25);
|
||||
static REMOTE_EXEC_SERVER_INSTANCE_COUNTER: AtomicU64 = AtomicU64::new(0);
|
||||
|
||||
#[derive(Debug)]
|
||||
struct RemoteExecServerProcess {
|
||||
container_name: String,
|
||||
pid: u32,
|
||||
remote_exec_server_path: String,
|
||||
stdout_path: String,
|
||||
cleanup_paths: Vec<String>,
|
||||
}
|
||||
|
||||
impl Drop for RemoteExecServerProcess {
|
||||
fn drop(&mut self) {
|
||||
let cleanup_paths = self.cleanup_paths.join(" ");
|
||||
let cleanup_paths_script = if cleanup_paths.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
format!("rm -rf {cleanup_paths}; ")
|
||||
};
|
||||
let script = format!(
|
||||
"if kill -0 {pid} 2>/dev/null; then kill {pid}; fi; {cleanup_paths_script}rm -f {remote_exec_server_path} {stdout_path}",
|
||||
pid = self.pid,
|
||||
cleanup_paths_script = cleanup_paths_script,
|
||||
remote_exec_server_path = self.remote_exec_server_path,
|
||||
stdout_path = self.stdout_path
|
||||
);
|
||||
let _ = docker_command_capture_stdout(["exec", &self.container_name, "sh", "-lc", &script]);
|
||||
}
|
||||
}
|
||||
|
||||
impl RemoteExecServerProcess {
|
||||
fn register_cleanup_path(&mut self, path: &Path) {
|
||||
self.cleanup_paths.push(path.display().to_string());
|
||||
}
|
||||
}
|
||||
const REMOTE_EXEC_SERVER_URL_ENV_VAR: &str = "CODEX_TEST_REMOTE_EXEC_SERVER_URL";
|
||||
static REMOTE_TEST_INSTANCE_COUNTER: AtomicU64 = AtomicU64::new(0);
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct TestEnv {
|
||||
environment: codex_exec_server::Environment,
|
||||
cwd: AbsolutePathBuf,
|
||||
local_cwd_temp_dir: Option<Arc<TempDir>>,
|
||||
_remote_exec_server_process: Option<RemoteExecServerProcess>,
|
||||
remote_container_name: Option<String>,
|
||||
}
|
||||
|
||||
impl TestEnv {
|
||||
@@ -117,7 +79,7 @@ impl TestEnv {
|
||||
environment,
|
||||
cwd,
|
||||
local_cwd_temp_dir: Some(local_cwd_temp_dir),
|
||||
_remote_exec_server_process: None,
|
||||
remote_container_name: None,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -138,12 +100,19 @@ impl TestEnv {
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for TestEnv {
|
||||
fn drop(&mut self) {
|
||||
if let Some(container_name) = &self.remote_container_name {
|
||||
let script = format!("rm -rf {}", self.cwd.as_path().display());
|
||||
let _ = docker_command_capture_stdout(["exec", container_name, "sh", "-lc", &script]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn test_env() -> Result<TestEnv> {
|
||||
match get_remote_test_env() {
|
||||
Some(remote_env) => {
|
||||
let mut remote_process = start_remote_exec_server(&remote_env)?;
|
||||
let remote_ip = remote_container_ip(&remote_env.container_name)?;
|
||||
let websocket_url = rewrite_websocket_host(&remote_process.listen_url, &remote_ip)?;
|
||||
let websocket_url = remote_exec_server_url()?;
|
||||
let environment = codex_exec_server::Environment::create(Some(websocket_url)).await?;
|
||||
let cwd = remote_aware_cwd_path();
|
||||
environment
|
||||
@@ -154,184 +123,43 @@ pub async fn test_env() -> Result<TestEnv> {
|
||||
/*sandbox*/ None,
|
||||
)
|
||||
.await?;
|
||||
remote_process.process.register_cleanup_path(cwd.as_path());
|
||||
Ok(TestEnv {
|
||||
environment,
|
||||
cwd,
|
||||
local_cwd_temp_dir: None,
|
||||
_remote_exec_server_process: Some(remote_process.process),
|
||||
remote_container_name: Some(remote_env.container_name),
|
||||
})
|
||||
}
|
||||
None => TestEnv::local().await,
|
||||
}
|
||||
}
|
||||
|
||||
struct RemoteExecServerStart {
|
||||
process: RemoteExecServerProcess,
|
||||
listen_url: String,
|
||||
}
|
||||
|
||||
fn start_remote_exec_server(remote_env: &RemoteEnvConfig) -> Result<RemoteExecServerStart> {
|
||||
let container_name = remote_env.container_name.as_str();
|
||||
let instance_id = remote_exec_server_instance_id();
|
||||
let remote_exec_server_path = format!("/tmp/codex-{instance_id}");
|
||||
let remote_linux_sandbox_path = format!("/tmp/codex-linux-sandbox-{instance_id}");
|
||||
let stdout_path = format!("/tmp/codex-exec-server-{instance_id}.stdout");
|
||||
let local_binary = codex_utils_cargo_bin::cargo_bin("codex").context("resolve codex binary")?;
|
||||
let local_linux_sandbox = codex_utils_cargo_bin::cargo_bin("codex-linux-sandbox")
|
||||
.context("resolve codex-linux-sandbox binary")?;
|
||||
let local_binary = local_binary.to_string_lossy().to_string();
|
||||
let local_linux_sandbox = local_linux_sandbox.to_string_lossy().to_string();
|
||||
let remote_binary = format!("{container_name}:{remote_exec_server_path}");
|
||||
let remote_linux_sandbox = format!("{container_name}:{remote_linux_sandbox_path}");
|
||||
|
||||
docker_command_success(["cp", &local_binary, &remote_binary])?;
|
||||
docker_command_success(["cp", &local_linux_sandbox, &remote_linux_sandbox])?;
|
||||
docker_command_success([
|
||||
"exec",
|
||||
container_name,
|
||||
"chmod",
|
||||
"+x",
|
||||
&remote_exec_server_path,
|
||||
])?;
|
||||
docker_command_success([
|
||||
"exec",
|
||||
container_name,
|
||||
"chmod",
|
||||
"+x",
|
||||
&remote_linux_sandbox_path,
|
||||
])?;
|
||||
probe_remote_linux_sandbox(container_name, &remote_linux_sandbox_path)?;
|
||||
|
||||
let start_script = format!(
|
||||
"rm -f {stdout_path}; \
|
||||
nohup {remote_exec_server_path} exec-server --listen ws://0.0.0.0:0 > {stdout_path} 2>&1 & \
|
||||
echo $!"
|
||||
);
|
||||
let pid_output =
|
||||
docker_command_capture_stdout(["exec", container_name, "sh", "-lc", &start_script])?;
|
||||
let pid = pid_output
|
||||
.trim()
|
||||
.parse::<u32>()
|
||||
.with_context(|| format!("parse remote exec-server PID from {pid_output:?}"))?;
|
||||
|
||||
let listen_url = wait_for_remote_listen_url(container_name, &stdout_path)?;
|
||||
|
||||
Ok(RemoteExecServerStart {
|
||||
process: RemoteExecServerProcess {
|
||||
container_name: container_name.to_string(),
|
||||
pid,
|
||||
remote_exec_server_path,
|
||||
stdout_path,
|
||||
cleanup_paths: vec![remote_linux_sandbox_path],
|
||||
},
|
||||
listen_url,
|
||||
})
|
||||
}
|
||||
|
||||
fn probe_remote_linux_sandbox(container_name: &str, remote_linux_sandbox_path: &str) -> Result<()> {
|
||||
let policy = serde_json::to_string(&SandboxPolicy::new_read_only_policy())
|
||||
.context("serialize remote sandbox probe policy")?;
|
||||
let probe_script = format!(
|
||||
"{remote_linux_sandbox_path} --sandbox-policy-cwd /tmp --sandbox-policy '{policy}' -- /bin/true"
|
||||
);
|
||||
let output = Command::new("docker")
|
||||
.args(["exec", container_name, "sh", "-lc", &probe_script])
|
||||
.output()
|
||||
.with_context(|| format!("probe remote linux sandbox in container `{container_name}`"))?;
|
||||
if !output.status.success() {
|
||||
return Err(anyhow!(
|
||||
"remote linux sandbox probe failed in container `{container_name}`: stdout={} stderr={}",
|
||||
String::from_utf8_lossy(&output.stdout).trim(),
|
||||
String::from_utf8_lossy(&output.stderr).trim()
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn remote_aware_cwd_path() -> AbsolutePathBuf {
|
||||
PathBuf::from(format!(
|
||||
"/tmp/codex-core-test-cwd-{}",
|
||||
remote_exec_server_instance_id()
|
||||
remote_test_instance_id()
|
||||
))
|
||||
.abs()
|
||||
}
|
||||
|
||||
fn wait_for_remote_listen_url(container_name: &str, stdout_path: &str) -> Result<String> {
|
||||
let deadline = Instant::now() + REMOTE_EXEC_SERVER_START_TIMEOUT;
|
||||
loop {
|
||||
let line = docker_command_capture_stdout([
|
||||
"exec",
|
||||
container_name,
|
||||
"sh",
|
||||
"-lc",
|
||||
&format!("head -n 1 {stdout_path} 2>/dev/null || true"),
|
||||
])?;
|
||||
let listen_url = line.trim();
|
||||
if listen_url.starts_with("ws://") {
|
||||
return Ok(listen_url.to_string());
|
||||
}
|
||||
|
||||
if Instant::now() >= deadline {
|
||||
return Err(anyhow!(
|
||||
"timed out waiting for remote exec-server listen URL in container `{container_name}` after {REMOTE_EXEC_SERVER_START_TIMEOUT:?}"
|
||||
));
|
||||
}
|
||||
std::thread::sleep(REMOTE_EXEC_SERVER_POLL_INTERVAL);
|
||||
fn remote_exec_server_url() -> Result<String> {
|
||||
let listen_url = std::env::var(REMOTE_EXEC_SERVER_URL_ENV_VAR).with_context(|| {
|
||||
format!("{REMOTE_EXEC_SERVER_URL_ENV_VAR} must be set for remote tests")
|
||||
})?;
|
||||
let listen_url = listen_url.trim();
|
||||
if listen_url.is_empty() {
|
||||
return Err(anyhow!(
|
||||
"{REMOTE_EXEC_SERVER_URL_ENV_VAR} must not be empty"
|
||||
));
|
||||
}
|
||||
Ok(listen_url.to_string())
|
||||
}
|
||||
|
||||
fn remote_exec_server_instance_id() -> String {
|
||||
let instance = REMOTE_EXEC_SERVER_INSTANCE_COUNTER.fetch_add(1, Ordering::Relaxed);
|
||||
fn remote_test_instance_id() -> String {
|
||||
let instance = REMOTE_TEST_INSTANCE_COUNTER.fetch_add(1, Ordering::Relaxed);
|
||||
format!("{}-{instance}", std::process::id())
|
||||
}
|
||||
|
||||
fn remote_container_ip(container_name: &str) -> Result<String> {
|
||||
let ip = docker_command_capture_stdout([
|
||||
"inspect",
|
||||
"-f",
|
||||
"{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}",
|
||||
container_name,
|
||||
])?;
|
||||
let ip = ip.trim();
|
||||
if ip.is_empty() {
|
||||
return Err(anyhow!(
|
||||
"container `{container_name}` has no IP address; cannot connect to remote exec-server"
|
||||
));
|
||||
}
|
||||
Ok(ip.to_string())
|
||||
}
|
||||
|
||||
fn rewrite_websocket_host(listen_url: &str, host: &str) -> Result<String> {
|
||||
let Some(address) = listen_url.strip_prefix("ws://") else {
|
||||
return Err(anyhow!(
|
||||
"unexpected websocket listen URL `{listen_url}`; expected ws://IP:PORT"
|
||||
));
|
||||
};
|
||||
let Some((_, port)) = address.rsplit_once(':') else {
|
||||
return Err(anyhow!(
|
||||
"unexpected websocket listen URL `{listen_url}`; expected ws://IP:PORT"
|
||||
));
|
||||
};
|
||||
Ok(format!("ws://{host}:{port}"))
|
||||
}
|
||||
|
||||
fn docker_command_success<const N: usize>(args: [&str; N]) -> Result<()> {
|
||||
let output = Command::new("docker")
|
||||
.args(args)
|
||||
.output()
|
||||
.with_context(|| format!("run docker {args:?}"))?;
|
||||
if !output.status.success() {
|
||||
return Err(anyhow!(
|
||||
"docker {:?} failed: stdout={} stderr={}",
|
||||
args,
|
||||
String::from_utf8_lossy(&output.stdout).trim(),
|
||||
String::from_utf8_lossy(&output.stderr).trim()
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn docker_command_capture_stdout<const N: usize>(args: [&str; N]) -> Result<String> {
|
||||
let output = Command::new("docker")
|
||||
.args(args)
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
#![cfg(target_os = "macos")]
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::string::ToString;
|
||||
|
||||
use codex_core::exec::ExecCapturePolicy;
|
||||
use codex_core::exec::ExecParams;
|
||||
use codex_core::exec::process_exec_tool_call;
|
||||
@@ -17,6 +14,7 @@ use codex_protocol::protocol::SandboxPolicy;
|
||||
use codex_sandboxing::SandboxType;
|
||||
use codex_sandboxing::get_platform_sandbox;
|
||||
use core_test_support::PathExt;
|
||||
use std::collections::HashMap;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn skip_test() -> bool {
|
||||
@@ -29,14 +27,18 @@ fn skip_test() -> bool {
|
||||
}
|
||||
|
||||
#[expect(clippy::expect_used)]
|
||||
async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>) -> Result<ExecToolCallOutput> {
|
||||
async fn run_test_cmd<I, S>(tmp: TempDir, command: I) -> Result<ExecToolCallOutput>
|
||||
where
|
||||
I: IntoIterator<Item = S>,
|
||||
S: Into<String>,
|
||||
{
|
||||
let sandbox_type = get_platform_sandbox(/*windows_sandbox_enabled*/ false)
|
||||
.expect("should be able to get sandbox type");
|
||||
assert_eq!(sandbox_type, SandboxType::MacosSeatbelt);
|
||||
let cwd = tmp.path().abs();
|
||||
|
||||
let params = ExecParams {
|
||||
command: cmd.iter().map(ToString::to_string).collect(),
|
||||
command: command.into_iter().map(Into::into).collect(),
|
||||
cwd: cwd.clone(),
|
||||
expiration: 1000.into(),
|
||||
capture_policy: ExecCapturePolicy::ShellTool,
|
||||
@@ -129,6 +131,37 @@ async fn exit_command_not_found_is_ok() {
|
||||
run_test_cmd(tmp, cmd).await.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn openpty_works_under_real_exec_seatbelt_path() {
|
||||
if skip_test() {
|
||||
return;
|
||||
}
|
||||
|
||||
let python = match which::which("python3") {
|
||||
Ok(path) => path,
|
||||
Err(_) => {
|
||||
eprintln!("python3 not found in PATH, skipping test.");
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||
let cmd = vec![
|
||||
python.to_string_lossy().into_owned(),
|
||||
"-c".to_string(),
|
||||
r#"import os
|
||||
|
||||
master, slave = os.openpty()
|
||||
os.write(slave, b"ping")
|
||||
assert os.read(master, 4) == b"ping""#
|
||||
.to_string(),
|
||||
];
|
||||
|
||||
let output = run_test_cmd(tmp, cmd).await.unwrap();
|
||||
assert_eq!(output.stdout.text, "");
|
||||
assert_eq!(output.stderr.text, "");
|
||||
}
|
||||
|
||||
/// Writing a file fails and should be considered a sandbox error
|
||||
#[tokio::test]
|
||||
async fn write_file_fails_as_sandbox_error() {
|
||||
@@ -139,7 +172,7 @@ async fn write_file_fails_as_sandbox_error() {
|
||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||
let path = tmp.path().join("test.txt");
|
||||
let cmd = vec![
|
||||
"/user/bin/touch",
|
||||
"/usr/bin/touch",
|
||||
path.to_str().expect("should be able to get path"),
|
||||
];
|
||||
|
||||
|
||||
@@ -85,7 +85,6 @@ mod rmcp_client;
|
||||
mod rollout_list_find;
|
||||
mod safety_check_downgrade;
|
||||
mod search_tool;
|
||||
mod seatbelt;
|
||||
mod shell_command;
|
||||
mod shell_serialization;
|
||||
mod shell_snapshot;
|
||||
|
||||
@@ -1,316 +0,0 @@
|
||||
#![cfg(target_os = "macos")]
|
||||
|
||||
//! Tests for the macOS sandboxing that are specific to Seatbelt.
|
||||
//! Tests that apply to both Mac and Linux sandboxing should go in sandbox.rs.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use codex_core::seatbelt::spawn_command_under_seatbelt;
|
||||
use codex_core::spawn::CODEX_SANDBOX_ENV_VAR;
|
||||
use codex_core::spawn::StdioPolicy;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use codex_utils_absolute_path::AbsolutePathBuf;
|
||||
use tempfile::TempDir;
|
||||
|
||||
struct TestScenario {
|
||||
repo_parent: PathBuf,
|
||||
file_outside_repo: PathBuf,
|
||||
repo_root: PathBuf,
|
||||
file_in_repo_root: PathBuf,
|
||||
file_in_dot_git_dir: PathBuf,
|
||||
}
|
||||
|
||||
struct TestExpectations {
|
||||
file_outside_repo_is_writable: bool,
|
||||
file_in_repo_root_is_writable: bool,
|
||||
file_in_dot_git_dir_is_writable: bool,
|
||||
}
|
||||
|
||||
impl TestScenario {
|
||||
async fn run_test(&self, policy: &SandboxPolicy, expectations: TestExpectations) {
|
||||
if std::env::var(CODEX_SANDBOX_ENV_VAR) == Ok("seatbelt".to_string()) {
|
||||
eprintln!("{CODEX_SANDBOX_ENV_VAR} is set to 'seatbelt', skipping test.");
|
||||
return;
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
touch(&self.file_outside_repo, policy).await,
|
||||
expectations.file_outside_repo_is_writable
|
||||
);
|
||||
assert_eq!(
|
||||
self.file_outside_repo.exists(),
|
||||
expectations.file_outside_repo_is_writable
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
touch(&self.file_in_repo_root, policy).await,
|
||||
expectations.file_in_repo_root_is_writable
|
||||
);
|
||||
assert_eq!(
|
||||
self.file_in_repo_root.exists(),
|
||||
expectations.file_in_repo_root_is_writable
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
touch(&self.file_in_dot_git_dir, policy).await,
|
||||
expectations.file_in_dot_git_dir_is_writable
|
||||
);
|
||||
assert_eq!(
|
||||
self.file_in_dot_git_dir.exists(),
|
||||
expectations.file_in_dot_git_dir_is_writable
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// If the user has added a workspace root that is not a Git repo root, then
|
||||
/// the user has to specify `--skip-git-repo-check` or go through some
|
||||
/// interstitial that indicates they are taking on some risk because Git
|
||||
/// cannot be used to backup their work before the agent begins.
|
||||
///
|
||||
/// Because the user has agreed to this risk, we do not try find all .git
|
||||
/// folders in the workspace and block them (though we could change our
|
||||
/// position on this in the future).
|
||||
#[tokio::test]
|
||||
async fn if_parent_of_repo_is_writable_then_dot_git_folder_is_writable() {
|
||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||
let test_scenario = create_test_scenario(&tmp);
|
||||
let policy = SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: vec![test_scenario.repo_parent.as_path().try_into().unwrap()],
|
||||
read_only_access: Default::default(),
|
||||
network_access: false,
|
||||
exclude_tmpdir_env_var: true,
|
||||
exclude_slash_tmp: true,
|
||||
};
|
||||
|
||||
test_scenario
|
||||
.run_test(
|
||||
&policy,
|
||||
TestExpectations {
|
||||
file_outside_repo_is_writable: true,
|
||||
file_in_repo_root_is_writable: true,
|
||||
file_in_dot_git_dir_is_writable: true,
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
/// When the writable root is the root of a Git repository (as evidenced by the
|
||||
/// presence of a .git folder), then the .git folder should be read-only if
|
||||
/// the policy is `WorkspaceWrite`.
|
||||
#[tokio::test]
|
||||
async fn if_git_repo_is_writable_root_then_dot_git_folder_is_read_only() {
|
||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||
let test_scenario = create_test_scenario(&tmp);
|
||||
let policy = SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: vec![test_scenario.repo_root.as_path().try_into().unwrap()],
|
||||
read_only_access: Default::default(),
|
||||
network_access: false,
|
||||
exclude_tmpdir_env_var: true,
|
||||
exclude_slash_tmp: true,
|
||||
};
|
||||
|
||||
test_scenario
|
||||
.run_test(
|
||||
&policy,
|
||||
TestExpectations {
|
||||
file_outside_repo_is_writable: false,
|
||||
file_in_repo_root_is_writable: true,
|
||||
file_in_dot_git_dir_is_writable: false,
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
/// Under DangerFullAccess, all writes should be permitted anywhere on disk,
|
||||
/// including inside the .git folder.
|
||||
#[tokio::test]
|
||||
async fn danger_full_access_allows_all_writes() {
|
||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||
let test_scenario = create_test_scenario(&tmp);
|
||||
let policy = SandboxPolicy::DangerFullAccess;
|
||||
|
||||
test_scenario
|
||||
.run_test(
|
||||
&policy,
|
||||
TestExpectations {
|
||||
file_outside_repo_is_writable: true,
|
||||
file_in_repo_root_is_writable: true,
|
||||
file_in_dot_git_dir_is_writable: true,
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
/// Under ReadOnly, writes should not be permitted anywhere on disk.
|
||||
#[tokio::test]
|
||||
async fn read_only_forbids_all_writes() {
|
||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||
let test_scenario = create_test_scenario(&tmp);
|
||||
let policy = SandboxPolicy::new_read_only_policy();
|
||||
|
||||
test_scenario
|
||||
.run_test(
|
||||
&policy,
|
||||
TestExpectations {
|
||||
file_outside_repo_is_writable: false,
|
||||
file_in_repo_root_is_writable: false,
|
||||
file_in_dot_git_dir_is_writable: false,
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn openpty_works_under_seatbelt() {
|
||||
if std::env::var(CODEX_SANDBOX_ENV_VAR) == Ok("seatbelt".to_string()) {
|
||||
eprintln!("{CODEX_SANDBOX_ENV_VAR} is set to 'seatbelt', skipping test.");
|
||||
return;
|
||||
}
|
||||
|
||||
if which::which("python3").is_err() {
|
||||
eprintln!("python3 not found in PATH, skipping test.");
|
||||
return;
|
||||
}
|
||||
|
||||
let policy = SandboxPolicy::new_read_only_policy();
|
||||
let command_cwd = AbsolutePathBuf::current_dir().expect("getcwd");
|
||||
let sandbox_cwd = command_cwd.clone();
|
||||
|
||||
let mut child = spawn_command_under_seatbelt(
|
||||
vec![
|
||||
"python3".to_string(),
|
||||
"-c".to_string(),
|
||||
r#"import os
|
||||
|
||||
master, slave = os.openpty()
|
||||
os.write(slave, b"ping")
|
||||
assert os.read(master, 4) == b"ping""#
|
||||
.to_string(),
|
||||
],
|
||||
command_cwd,
|
||||
&policy,
|
||||
&sandbox_cwd,
|
||||
StdioPolicy::RedirectForShellTool,
|
||||
/*network*/ None,
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.expect("should be able to spawn python under seatbelt");
|
||||
|
||||
let status = child
|
||||
.wait()
|
||||
.await
|
||||
.expect("should be able to wait for child process");
|
||||
assert!(status.success(), "python exited with {status:?}");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn java_home_finds_runtime_under_seatbelt() {
|
||||
if std::env::var(CODEX_SANDBOX_ENV_VAR) == Ok("seatbelt".to_string()) {
|
||||
eprintln!("{CODEX_SANDBOX_ENV_VAR} is set to 'seatbelt', skipping test.");
|
||||
return;
|
||||
}
|
||||
|
||||
let java_home_path = Path::new("/usr/libexec/java_home");
|
||||
if !java_home_path.exists() {
|
||||
eprintln!("/usr/libexec/java_home is not present, skipping test.");
|
||||
return;
|
||||
}
|
||||
|
||||
let baseline_output = tokio::process::Command::new(java_home_path)
|
||||
.env_remove("JAVA_HOME")
|
||||
.output()
|
||||
.await
|
||||
.expect("should be able to invoke java_home outside seatbelt");
|
||||
if !baseline_output.status.success() {
|
||||
eprintln!(
|
||||
"java_home exited with {:?} outside seatbelt, skipping test",
|
||||
baseline_output.status
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let policy = SandboxPolicy::new_read_only_policy();
|
||||
let command_cwd = AbsolutePathBuf::current_dir().expect("getcwd");
|
||||
let sandbox_cwd = command_cwd.clone();
|
||||
|
||||
let mut env: HashMap<String, String> = std::env::vars().collect();
|
||||
env.remove("JAVA_HOME");
|
||||
env.remove(CODEX_SANDBOX_ENV_VAR);
|
||||
|
||||
let child = spawn_command_under_seatbelt(
|
||||
vec![java_home_path.to_string_lossy().to_string()],
|
||||
command_cwd,
|
||||
&policy,
|
||||
&sandbox_cwd,
|
||||
StdioPolicy::RedirectForShellTool,
|
||||
/*network*/ None,
|
||||
env,
|
||||
)
|
||||
.await
|
||||
.expect("should be able to spawn java_home under seatbelt");
|
||||
|
||||
let output = child
|
||||
.wait_with_output()
|
||||
.await
|
||||
.expect("should be able to wait for java_home child");
|
||||
assert!(
|
||||
output.status.success(),
|
||||
"java_home under seatbelt exited with {:?}, stderr: {}",
|
||||
output.status,
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
);
|
||||
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
assert!(
|
||||
!stdout.trim().is_empty(),
|
||||
"java_home stdout unexpectedly empty under seatbelt"
|
||||
);
|
||||
}
|
||||
|
||||
#[expect(clippy::expect_used)]
|
||||
fn create_test_scenario(tmp: &TempDir) -> TestScenario {
|
||||
let repo_parent = tmp.path().to_path_buf();
|
||||
let repo_root = repo_parent.join("repo");
|
||||
let dot_git_dir = repo_root.join(".git");
|
||||
|
||||
std::fs::create_dir(&repo_root).expect("should be able to create repo root");
|
||||
std::fs::create_dir(&dot_git_dir).expect("should be able to create .git dir");
|
||||
|
||||
TestScenario {
|
||||
file_outside_repo: repo_parent.join("outside.txt"),
|
||||
repo_parent,
|
||||
file_in_repo_root: repo_root.join("repo_file.txt"),
|
||||
repo_root,
|
||||
file_in_dot_git_dir: dot_git_dir.join("dot_git_file.txt"),
|
||||
}
|
||||
}
|
||||
|
||||
#[expect(clippy::expect_used)]
|
||||
/// Note that `path` must be absolute.
|
||||
async fn touch(path: &Path, policy: &SandboxPolicy) -> bool {
|
||||
assert!(path.is_absolute(), "Path must be absolute: {path:?}");
|
||||
let command_cwd = AbsolutePathBuf::current_dir().expect("getcwd");
|
||||
let sandbox_cwd = command_cwd.clone();
|
||||
let mut child = spawn_command_under_seatbelt(
|
||||
vec![
|
||||
"/usr/bin/touch".to_string(),
|
||||
path.to_string_lossy().to_string(),
|
||||
],
|
||||
command_cwd,
|
||||
policy,
|
||||
&sandbox_cwd,
|
||||
StdioPolicy::RedirectForShellTool,
|
||||
/*network*/ None,
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.expect("should be able to spawn command under seatbelt");
|
||||
child
|
||||
.wait()
|
||||
.await
|
||||
.expect("should be able to wait for child process")
|
||||
.success()
|
||||
}
|
||||
@@ -19,17 +19,67 @@ async fn spawn_command_under_sandbox(
|
||||
stdio_policy: StdioPolicy,
|
||||
env: HashMap<String, String>,
|
||||
) -> std::io::Result<Child> {
|
||||
use codex_core::seatbelt::spawn_command_under_seatbelt;
|
||||
spawn_command_under_seatbelt(
|
||||
command,
|
||||
command_cwd,
|
||||
use codex_core::exec::ExecCapturePolicy;
|
||||
use codex_core::exec::ExecParams;
|
||||
use codex_core::exec::build_exec_request;
|
||||
use codex_core::sandboxing::SandboxPermissions;
|
||||
use codex_protocol::config_types::WindowsSandboxLevel;
|
||||
use codex_protocol::permissions::FileSystemSandboxPolicy;
|
||||
use codex_protocol::permissions::NetworkSandboxPolicy;
|
||||
use std::process::Stdio;
|
||||
|
||||
let codex_linux_sandbox_exe = None;
|
||||
let exec_request = build_exec_request(
|
||||
ExecParams {
|
||||
command,
|
||||
cwd: command_cwd,
|
||||
expiration: 1000.into(),
|
||||
capture_policy: ExecCapturePolicy::ShellTool,
|
||||
env,
|
||||
network: None,
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
windows_sandbox_level: WindowsSandboxLevel::Disabled,
|
||||
windows_sandbox_private_desktop: false,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
},
|
||||
sandbox_policy,
|
||||
&FileSystemSandboxPolicy::from_legacy_sandbox_policy(sandbox_policy, sandbox_cwd),
|
||||
NetworkSandboxPolicy::from(sandbox_policy),
|
||||
sandbox_cwd,
|
||||
stdio_policy,
|
||||
/*network*/ None,
|
||||
env,
|
||||
&codex_linux_sandbox_exe,
|
||||
/*use_legacy_landlock*/ false,
|
||||
)
|
||||
.await
|
||||
.map_err(|err| io::Error::other(err.to_string()))?;
|
||||
|
||||
let (program, args) = exec_request
|
||||
.command
|
||||
.split_first()
|
||||
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidInput, "command args are empty"))?;
|
||||
|
||||
let mut child = tokio::process::Command::new(program);
|
||||
if let Some(arg0) = exec_request.arg0.as_deref() {
|
||||
child.arg0(arg0);
|
||||
}
|
||||
child.args(args);
|
||||
child.current_dir(exec_request.cwd);
|
||||
child.env_clear();
|
||||
child.envs(exec_request.env);
|
||||
|
||||
match stdio_policy {
|
||||
StdioPolicy::RedirectForShellTool => {
|
||||
child.stdin(Stdio::null());
|
||||
child.stdout(Stdio::piped()).stderr(Stdio::piped());
|
||||
}
|
||||
StdioPolicy::Inherit => {
|
||||
child
|
||||
.stdin(Stdio::inherit())
|
||||
.stdout(Stdio::inherit())
|
||||
.stderr(Stdio::inherit());
|
||||
}
|
||||
}
|
||||
|
||||
child.kill_on_drop(true).spawn()
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
|
||||
@@ -18,6 +18,11 @@ is_sourced() {
|
||||
setup_remote_env() {
|
||||
local container_name
|
||||
local codex_binary_path
|
||||
local container_ip
|
||||
local remote_codex_path
|
||||
local remote_exec_server_pid
|
||||
local remote_exec_server_port
|
||||
local remote_exec_server_stdout_path
|
||||
|
||||
container_name="${CODEX_TEST_REMOTE_ENV_CONTAINER_NAME:-codex-remote-test-env-local-$(date +%s)-${RANDOM}}"
|
||||
codex_binary_path="${REPO_ROOT}/codex-rs/target/debug/codex"
|
||||
@@ -59,14 +64,58 @@ setup_remote_env() {
|
||||
return 1
|
||||
fi
|
||||
|
||||
if [[ -z "${CODEX_TEST_REMOTE_EXEC_SERVER_URL:-}" ]]; then
|
||||
remote_codex_path="/tmp/codex-remote-env/codex"
|
||||
remote_exec_server_port="31987"
|
||||
remote_exec_server_stdout_path="/tmp/codex-remote-env/exec-server.stdout"
|
||||
docker exec "${container_name}" sh -lc "mkdir -p /tmp/codex-remote-env"
|
||||
docker cp "${codex_binary_path}" "${container_name}:${remote_codex_path}"
|
||||
docker exec "${container_name}" chmod +x "${remote_codex_path}"
|
||||
remote_exec_server_pid="$(
|
||||
docker exec "${container_name}" sh -lc \
|
||||
"rm -f ${remote_exec_server_stdout_path}; nohup ${remote_codex_path} exec-server --listen ws://0.0.0.0:${remote_exec_server_port} > ${remote_exec_server_stdout_path} 2>&1 & echo \$!"
|
||||
)"
|
||||
wait_for_remote_exec_server_port "${container_name}" "${remote_exec_server_port}" "${remote_exec_server_stdout_path}"
|
||||
container_ip="$(
|
||||
docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "${container_name}"
|
||||
)"
|
||||
if [[ -z "${container_ip}" ]]; then
|
||||
echo "container ${container_name} has no IP address" >&2
|
||||
docker rm -f "${container_name}" >/dev/null 2>&1 || true
|
||||
return 1
|
||||
fi
|
||||
export CODEX_TEST_REMOTE_EXEC_SERVER_PID="${remote_exec_server_pid}"
|
||||
export CODEX_TEST_REMOTE_EXEC_SERVER_URL="ws://${container_ip}:${remote_exec_server_port}"
|
||||
fi
|
||||
|
||||
export CODEX_TEST_REMOTE_ENV="${container_name}"
|
||||
}
|
||||
|
||||
wait_for_remote_exec_server_port() {
|
||||
local container_name="$1"
|
||||
local port="$2"
|
||||
local stdout_path="$3"
|
||||
local deadline=$((SECONDS + 5))
|
||||
|
||||
while (( SECONDS < deadline )); do
|
||||
if docker exec "${container_name}" python3 -c "import socket; socket.create_connection(('127.0.0.1', ${port}), timeout=0.2).close()" >/dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
sleep 0.025
|
||||
done
|
||||
|
||||
echo "timed out waiting for remote exec-server on ${container_name}:${port}" >&2
|
||||
docker exec "${container_name}" sh -lc "cat ${stdout_path} 2>/dev/null || true" >&2 || true
|
||||
return 1
|
||||
}
|
||||
|
||||
codex_remote_env_cleanup() {
|
||||
if [[ -n "${CODEX_TEST_REMOTE_ENV:-}" ]]; then
|
||||
docker rm -f "${CODEX_TEST_REMOTE_ENV}" >/dev/null 2>&1 || true
|
||||
unset CODEX_TEST_REMOTE_ENV
|
||||
fi
|
||||
unset CODEX_TEST_REMOTE_EXEC_SERVER_PID
|
||||
unset CODEX_TEST_REMOTE_EXEC_SERVER_URL
|
||||
}
|
||||
|
||||
if ! is_sourced; then
|
||||
@@ -79,6 +128,7 @@ set -euo pipefail
|
||||
if setup_remote_env; then
|
||||
status=0
|
||||
echo "CODEX_TEST_REMOTE_ENV=${CODEX_TEST_REMOTE_ENV}"
|
||||
echo "CODEX_TEST_REMOTE_EXEC_SERVER_URL=${CODEX_TEST_REMOTE_EXEC_SERVER_URL}"
|
||||
echo "Remote env ready. Run your command, then call: codex_remote_env_cleanup"
|
||||
else
|
||||
status=$?
|
||||
|
||||
Reference in New Issue
Block a user