fix(linux-sandbox): enforce root read carveouts in bwrap

This commit is contained in:
viyatb-oai
2026-03-07 22:14:56 -08:00
parent aec2a01f7a
commit e7eccd035a
5 changed files with 250 additions and 46 deletions

View File

@@ -10,6 +10,8 @@
//! - seccomp + `PR_SET_NO_NEW_PRIVS` applied in-process, and
//! - bubblewrap used to construct the filesystem view before exec.
use std::collections::BTreeSet;
use std::fs::File;
use std::os::fd::AsRawFd;
use std::path::Path;
use std::path::PathBuf;
@@ -77,6 +79,12 @@ impl BwrapNetworkMode {
}
}
#[derive(Debug)]
pub(crate) struct BwrapArgs {
pub args: Vec<String>,
pub preserved_files: Vec<File>,
}
/// Wrap a command with bubblewrap so the filesystem is read-only by default,
/// with explicit writable roots and read-only subpaths layered afterward.
///
@@ -84,15 +92,18 @@ impl BwrapNetworkMode {
/// returns `command` unchanged so we avoid unnecessary sandboxing overhead.
/// If network isolation is requested, we still wrap with bubblewrap so network
/// namespace restrictions apply while preserving full filesystem access.
pub(crate) fn create_bwrap_command_args_for_policy(
pub(crate) fn create_bwrap_command_args(
command: Vec<String>,
file_system_sandbox_policy: &FileSystemSandboxPolicy,
cwd: &Path,
options: BwrapOptions,
) -> Result<Vec<String>> {
) -> Result<BwrapArgs> {
if file_system_sandbox_policy.has_full_disk_write_access() {
return if options.network_mode == BwrapNetworkMode::FullAccess {
Ok(command)
Ok(BwrapArgs {
args: command,
preserved_files: Vec::new(),
})
} else {
Ok(create_bwrap_flags_full_filesystem(command, options))
};
@@ -101,7 +112,7 @@ pub(crate) fn create_bwrap_command_args_for_policy(
create_bwrap_flags(command, file_system_sandbox_policy, cwd, options)
}
fn create_bwrap_flags_full_filesystem(command: Vec<String>, options: BwrapOptions) -> Vec<String> {
fn create_bwrap_flags_full_filesystem(command: Vec<String>, options: BwrapOptions) -> BwrapArgs {
let mut args = vec![
"--new-session".to_string(),
"--die-with-parent".to_string(),
@@ -122,7 +133,10 @@ fn create_bwrap_flags_full_filesystem(command: Vec<String>, options: BwrapOption
}
args.push("--".to_string());
args.extend(command);
args
BwrapArgs {
args,
preserved_files: Vec::new(),
}
}
/// Build the bubblewrap flags (everything after `argv[0]`).
@@ -131,11 +145,15 @@ fn create_bwrap_flags(
file_system_sandbox_policy: &FileSystemSandboxPolicy,
cwd: &Path,
options: BwrapOptions,
) -> Result<Vec<String>> {
) -> Result<BwrapArgs> {
let BwrapArgs {
args: filesystem_args,
preserved_files,
} = create_filesystem_args(file_system_sandbox_policy, cwd)?;
let mut args = Vec::new();
args.push("--new-session".to_string());
args.push("--die-with-parent".to_string());
args.extend(create_filesystem_args(file_system_sandbox_policy, cwd)?);
args.extend(filesystem_args);
// Request a user namespace explicitly rather than relying on bubblewrap's
// auto-enable behavior, which is skipped when the caller runs as uid 0.
args.push("--unshare-user".to_string());
@@ -151,25 +169,32 @@ fn create_bwrap_flags(
}
args.push("--".to_string());
args.extend(command);
Ok(args)
Ok(BwrapArgs {
args,
preserved_files,
})
}
/// Build the bubblewrap filesystem mounts for a given filesystem policy.
///
/// The mount order is important:
/// 1. Full-read policies use `--ro-bind / /`; restricted-read policies start
/// from `--tmpfs /` and layer scoped `--ro-bind` mounts.
/// 1. Full-read policies, and restricted policies that explicitly read `/`,
/// use `--ro-bind / /`; other restricted-read policies start from
/// `--tmpfs /` and layer scoped `--ro-bind` mounts.
/// 2. `--dev /dev` mounts a minimal writable `/dev` with standard device nodes
/// (including `/dev/urandom`) even under a read-only root.
/// 3. `--bind <root> <root>` re-enables writes for allowed roots, including
/// writable subpaths under `/dev` (for example, `/dev/shm`).
/// 4. `--ro-bind <subpath> <subpath>` re-applies read-only protections under
/// those writable roots so protected subpaths win.
/// 5. Explicit unreadable roots are masked last so deny carveouts still win
/// even when the readable baseline includes `/`.
fn create_filesystem_args(
file_system_sandbox_policy: &FileSystemSandboxPolicy,
cwd: &Path,
) -> Result<Vec<String>> {
) -> Result<BwrapArgs> {
let writable_roots = file_system_sandbox_policy.get_writable_roots_with_cwd(cwd);
let unreadable_roots = file_system_sandbox_policy.get_unreadable_roots_with_cwd(cwd);
ensure_mount_targets_exist(&writable_roots)?;
let mut args = if file_system_sandbox_policy.has_full_disk_read_access() {
@@ -210,7 +235,8 @@ fn create_filesystem_args(
}
// A restricted policy can still explicitly request `/`, which is
// semantically equivalent to broad read access.
// the broad read baseline. Explicit unreadable carveouts are
// re-applied later.
if readable_roots.iter().any(|root| root == Path::new("/")) {
args = vec![
"--ro-bind".to_string(),
@@ -232,6 +258,7 @@ fn create_filesystem_args(
args
};
let mut preserved_files = Vec::new();
for writable_root in &writable_roots {
let root = writable_root.root.as_path();
@@ -275,7 +302,34 @@ fn create_filesystem_args(
}
}
Ok(args)
if !unreadable_roots.is_empty() {
let null_file = File::open("/dev/null")?;
let null_fd = null_file.as_raw_fd().to_string();
for unreadable_root in unreadable_roots {
let unreadable_root = unreadable_root.as_path();
if unreadable_root.is_dir() {
args.push("--perms".to_string());
args.push("000".to_string());
args.push("--tmpfs".to_string());
args.push(path_to_string(unreadable_root));
args.push("--remount-ro".to_string());
args.push(path_to_string(unreadable_root));
continue;
}
args.push("--perms".to_string());
args.push("000".to_string());
args.push("--ro-bind-data".to_string());
args.push(null_fd.clone());
args.push(path_to_string(unreadable_root));
}
preserved_files.push(null_file);
}
Ok(BwrapArgs {
args,
preserved_files,
})
}
/// Collect unique read-only subpaths across all writable roots.
@@ -394,6 +448,7 @@ mod tests {
use codex_protocol::protocol::FileSystemPath;
use codex_protocol::protocol::FileSystemSandboxEntry;
use codex_protocol::protocol::FileSystemSandboxPolicy;
use codex_protocol::protocol::FileSystemSpecialPath;
use codex_protocol::protocol::ReadOnlyAccess;
use codex_protocol::protocol::SandboxPolicy;
use codex_utils_absolute_path::AbsolutePathBuf;
@@ -403,7 +458,7 @@ mod tests {
#[test]
fn full_disk_write_full_network_returns_unwrapped_command() {
let command = vec!["/bin/true".to_string()];
let args = create_bwrap_command_args_for_policy(
let args = create_bwrap_command_args(
command.clone(),
&FileSystemSandboxPolicy::from(&SandboxPolicy::DangerFullAccess),
Path::new("/"),
@@ -414,13 +469,13 @@ mod tests {
)
.expect("create bwrap args");
assert_eq!(args, command);
assert_eq!(args.args, command);
}
#[test]
fn full_disk_write_proxy_only_keeps_full_filesystem_but_unshares_network() {
let command = vec!["/bin/true".to_string()];
let args = create_bwrap_command_args_for_policy(
let args = create_bwrap_command_args(
command,
&FileSystemSandboxPolicy::from(&SandboxPolicy::DangerFullAccess),
Path::new("/"),
@@ -432,7 +487,7 @@ mod tests {
.expect("create bwrap args");
assert_eq!(
args,
args.args,
vec![
"--new-session".to_string(),
"--die-with-parent".to_string(),
@@ -466,7 +521,7 @@ mod tests {
)
.expect("bwrap fs args");
assert_eq!(
args,
args.args,
vec![
"--ro-bind".to_string(),
"/".to_string(),
@@ -503,10 +558,10 @@ mod tests {
let args = create_filesystem_args(&FileSystemSandboxPolicy::from(&policy), temp_dir.path())
.expect("filesystem args");
assert_eq!(args[0..4], ["--tmpfs", "/", "--dev", "/dev"]);
assert_eq!(args.args[0..4], ["--tmpfs", "/", "--dev", "/dev"]);
let readable_root_str = path_to_string(&readable_root);
assert!(args.windows(3).any(|window| {
assert!(args.args.windows(3).any(|window| {
window
== [
"--ro-bind",
@@ -533,11 +588,15 @@ mod tests {
let args = create_filesystem_args(&FileSystemSandboxPolicy::from(&policy), temp_dir.path())
.expect("filesystem args");
assert!(args.starts_with(&["--tmpfs".to_string(), "/".to_string()]));
assert!(
args.args
.starts_with(&["--tmpfs".to_string(), "/".to_string()])
);
if Path::new("/usr").exists() {
assert!(
args.windows(3)
args.args
.windows(3)
.any(|window| window == ["--ro-bind", "/usr", "/usr"])
);
}
@@ -571,7 +630,7 @@ mod tests {
let writable_root_str = path_to_string(writable_root.as_path());
let blocked_str = path_to_string(blocked.as_path());
assert!(args.windows(3).any(|window| {
assert!(args.args.windows(3).any(|window| {
window
== [
"--bind",
@@ -580,9 +639,84 @@ mod tests {
]
}));
assert!(
args.windows(3).any(|window| {
args.args.windows(3).any(|window| {
window == ["--ro-bind", blocked_str.as_str(), blocked_str.as_str()]
})
);
}
#[test]
fn split_policy_masks_root_read_directory_carveouts() {
let temp_dir = TempDir::new().expect("temp dir");
let blocked = temp_dir.path().join("blocked");
std::fs::create_dir_all(&blocked).expect("create blocked dir");
let blocked = AbsolutePathBuf::from_absolute_path(&blocked).expect("absolute blocked dir");
let policy = FileSystemSandboxPolicy::restricted(vec![
FileSystemSandboxEntry {
path: FileSystemPath::Special {
value: FileSystemSpecialPath::Root,
},
access: FileSystemAccessMode::Read,
},
FileSystemSandboxEntry {
path: FileSystemPath::Path {
path: blocked.clone(),
},
access: FileSystemAccessMode::None,
},
]);
let args = create_filesystem_args(&policy, temp_dir.path()).expect("filesystem args");
let blocked_str = path_to_string(blocked.as_path());
assert!(
args.args
.windows(3)
.any(|window| window == ["--ro-bind", "/", "/"])
);
assert!(
args.args
.windows(4)
.any(|window| { window == ["--perms", "000", "--tmpfs", blocked_str.as_str()] })
);
assert!(
args.args
.windows(2)
.any(|window| window == ["--remount-ro", blocked_str.as_str()])
);
}
#[test]
fn split_policy_masks_root_read_file_carveouts() {
let temp_dir = TempDir::new().expect("temp dir");
let blocked_file = temp_dir.path().join("blocked.txt");
std::fs::write(&blocked_file, "secret").expect("create blocked file");
let blocked_file =
AbsolutePathBuf::from_absolute_path(&blocked_file).expect("absolute blocked file");
let policy = FileSystemSandboxPolicy::restricted(vec![
FileSystemSandboxEntry {
path: FileSystemPath::Special {
value: FileSystemSpecialPath::Root,
},
access: FileSystemAccessMode::Read,
},
FileSystemSandboxEntry {
path: FileSystemPath::Path {
path: blocked_file.clone(),
},
access: FileSystemAccessMode::None,
},
]);
let args = create_filesystem_args(&policy, temp_dir.path()).expect("filesystem args");
let blocked_file_str = path_to_string(blocked_file.as_path());
assert_eq!(args.preserved_files.len(), 1);
assert!(args.args.windows(5).any(|window| {
window[0] == "--perms"
&& window[1] == "000"
&& window[2] == "--ro-bind-data"
&& window[4] == blocked_file_str
}));
}
}