refactor: delete exec-server and move execve wrapper into shell-escalation (#12632)

## Why

We already plan to remove the shell-tool MCP path, and doing that
cleanup first makes the follow-on `shell-escalation` work much simpler.

This change removes the last remaining reason to keep
`codex-rs/exec-server` around by moving the `codex-execve-wrapper`
binary and shared shell test fixtures to the crates/tests that now own
that functionality.

## What Changed

### Delete `codex-rs/exec-server`

- Remove the `exec-server` crate, including the MCP server binary,
MCP-specific modules, and its test support/test suite
- Remove `exec-server` from the `codex-rs` workspace and update
`Cargo.lock`

### Move `codex-execve-wrapper` into `codex-rs/shell-escalation`

- Move the wrapper implementation into `shell-escalation`
(`src/unix/execve_wrapper.rs`)
- Add the `codex-execve-wrapper` binary entrypoint under
`shell-escalation/src/bin/`
- Update `shell-escalation` exports/module layout so the wrapper
entrypoint is hosted there
- Move the wrapper README content from `exec-server` to
`shell-escalation/README.md`

### Move shared shell test fixtures to `app-server`

- Move the DotSlash `bash`/`zsh` test fixtures from
`exec-server/tests/suite/` to `app-server/tests/suite/`
- Update `app-server` zsh-fork tests to reference the new fixture paths

### Keep `shell-tool-mcp` as a shell-assets package

- Update `.github/workflows/shell-tool-mcp.yml` packaging so the npm
artifact contains only patched Bash/Zsh payloads (no Rust binaries)
- Update `shell-tool-mcp/package.json`, `shell-tool-mcp/src/index.ts`,
and docs to reflect the shell-assets-only package shape
- `shell-tool-mcp-ci.yml` does not need changes because it is already
JS-only

## Verification

- `cargo shear`
- `cargo clippy -p codex-shell-escalation --tests`
- `just clippy`
This commit is contained in:
Michael Bolin
2026-02-23 20:10:22 -08:00
committed by GitHub
parent 5a3bdcb27b
commit 38f84b6b29
32 changed files with 163 additions and 1699 deletions

View File

@@ -1,8 +0,0 @@
#[cfg(not(unix))]
fn main() {
eprintln!("codex-execve-wrapper is only implemented for UNIX");
std::process::exit(1);
}
#[cfg(unix)]
pub use codex_exec_server::main_execve_wrapper as main;

View File

@@ -1,8 +0,0 @@
#[cfg(not(unix))]
fn main() {
eprintln!("codex-exec-mcp-server is only implemented for UNIX");
std::process::exit(1);
}
#[cfg(unix)]
pub use codex_exec_server::main_mcp_server as main;

View File

@@ -1,5 +0,0 @@
#[cfg(unix)]
mod unix;
#[cfg(unix)]
pub use unix::*;

View File

@@ -1,301 +0,0 @@
//! This is an MCP that implements an alternative `shell` tool with fine-grained privilege
//! escalation based on a per-exec() policy.
//!
//! We spawn Bash process inside a sandbox. The Bash we spawn is patched to allow us to intercept
//! every exec() call it makes by invoking a wrapper program and passing in the arguments it would
//! have passed to exec(). The Bash process (and its descendants) inherit a communication socket
//! from us, and we give its fd number in the CODEX_ESCALATE_SOCKET environment variable.
//!
//! When we intercept an exec() call, we send a message over the socket back to the main
//! MCP process. The MCP process can then decide whether to allow the exec() call to proceed
//! or to escalate privileges and run the requested command with elevated permissions. In the
//! latter case, we send a message back to the child requesting that it forward its open FDs to us.
//! We then execute the requested command on its behalf, patching in the forwarded FDs.
//!
//!
//! ### The privilege escalation flow
//!
//! Child MCP Bash Escalate Helper
//! |
//! o----->o
//! | |
//! | o--(exec)-->o
//! | | |
//! |o<-(EscalateReq)--o
//! || | |
//! |o--(Escalate)---->o
//! || | |
//! |o<---------(fds)--o
//! || | |
//! o<-----o | |
//! | || | |
//! x----->o | |
//! || | |
//! |x--(exit code)--->o
//! | | |
//! | o<--(exit)--x
//! | |
//! o<-----x
//!
//! ### The non-escalation flow
//!
//! MCP Bash Escalate Helper Child
//! |
//! o----->o
//! | |
//! | o--(exec)-->o
//! | | |
//! |o<-(EscalateReq)--o
//! || | |
//! |o-(Run)---------->o
//! | | |
//! | | x--(exec)-->o
//! | | |
//! | o<--------------(exit)--x
//! | |
//! o<-----x
//!
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
use anyhow::Context as _;
use clap::Parser;
use codex_core::config::find_codex_home;
use codex_core::sandboxing::SandboxPermissions;
use codex_execpolicy::Decision;
use codex_execpolicy::Policy;
use codex_execpolicy::RuleMatch;
use codex_shell_command::is_dangerous_command::command_might_be_dangerous;
use codex_shell_escalation as shell_escalation;
use rmcp::ErrorData as McpError;
use tokio::sync::RwLock;
use tracing_subscriber::EnvFilter;
use tracing_subscriber::{self};
use crate::unix::mcp_escalation_policy::ExecPolicyOutcome;
mod mcp;
mod mcp_escalation_policy;
pub use mcp::ExecResult;
/// Default value of --execve option relative to the current executable.
/// Note this must match the name of the binary as specified in Cargo.toml.
const CODEX_EXECVE_WRAPPER_EXE_NAME: &str = "codex-execve-wrapper";
#[derive(Parser)]
#[clap(version)]
struct McpServerCli {
/// Executable to delegate execve(2) calls to in Bash.
#[arg(long = "execve")]
execve_wrapper: Option<PathBuf>,
/// Path to Bash that has been patched to support execve() wrapping.
#[arg(long = "bash")]
bash_path: Option<PathBuf>,
/// Preserve program paths when applying execpolicy (e.g., keep /usr/bin/echo instead of echo).
/// Note: this does change the actual program being run.
#[arg(long)]
preserve_program_paths: bool,
}
#[tokio::main]
pub async fn main_mcp_server() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(EnvFilter::from_default_env())
.with_writer(std::io::stderr)
.with_ansi(false)
.init();
let cli = McpServerCli::parse();
let execve_wrapper = match cli.execve_wrapper {
Some(path) => path,
None => {
let cwd = std::env::current_exe()?;
cwd.parent()
.map(|p| p.join(CODEX_EXECVE_WRAPPER_EXE_NAME))
.ok_or_else(|| {
anyhow::anyhow!("failed to determine execve wrapper path from current exe")
})?
}
};
let bash_path = match cli.bash_path {
Some(path) => path,
None => mcp::get_bash_path()?,
};
let policy = Arc::new(RwLock::new(load_exec_policy().await?));
tracing::info!("Starting MCP server");
let service = mcp::serve(
bash_path,
execve_wrapper,
policy,
cli.preserve_program_paths,
)
.await
.inspect_err(|e| {
tracing::error!("serving error: {:?}", e);
})?;
service.waiting().await?;
Ok(())
}
#[derive(Parser)]
pub struct ExecveWrapperCli {
file: String,
#[arg(trailing_var_arg = true)]
argv: Vec<String>,
}
#[tokio::main]
pub async fn main_execve_wrapper() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(EnvFilter::from_default_env())
.with_writer(std::io::stderr)
.with_ansi(false)
.init();
let ExecveWrapperCli { file, argv } = ExecveWrapperCli::parse();
let exit_code = shell_escalation::run(file, argv).await?;
std::process::exit(exit_code);
}
/// Decide how to handle an exec() call for a specific command.
///
/// `file` is the absolute, canonical path to the executable to run, i.e. the first arg to exec.
/// `argv` is the argv, including the program name (`argv[0]`).
pub(crate) fn evaluate_exec_policy(
policy: &Policy,
file: &Path,
argv: &[String],
preserve_program_paths: bool,
) -> Result<ExecPolicyOutcome, McpError> {
let program_name = format_program_name(file, preserve_program_paths).ok_or_else(|| {
McpError::internal_error(
format!("failed to format program name for `{}`", file.display()),
None,
)
})?;
let command: Vec<String> = std::iter::once(program_name)
// Use the normalized program name instead of argv[0].
.chain(argv.iter().skip(1).cloned())
.collect();
let evaluation = policy.check(&command, &|cmd| {
if command_might_be_dangerous(cmd) {
Decision::Prompt
} else {
Decision::Allow
}
});
// decisions driven by policy should run outside sandbox
let decision_driven_by_policy = evaluation.matched_rules.iter().any(|rule_match| {
!matches!(rule_match, RuleMatch::HeuristicsRuleMatch { .. })
&& rule_match.decision() == evaluation.decision
});
let sandbox_permissions = if decision_driven_by_policy {
SandboxPermissions::RequireEscalated
} else {
SandboxPermissions::UseDefault
};
Ok(match evaluation.decision {
Decision::Forbidden => ExecPolicyOutcome::Forbidden,
Decision::Prompt => ExecPolicyOutcome::Prompt {
sandbox_permissions,
},
Decision::Allow => ExecPolicyOutcome::Allow {
sandbox_permissions,
},
})
}
fn format_program_name(path: &Path, preserve_program_paths: bool) -> Option<String> {
if preserve_program_paths {
path.to_str().map(str::to_string)
} else {
path.file_name()?.to_str().map(str::to_string)
}
}
async fn load_exec_policy() -> anyhow::Result<Policy> {
let codex_home = find_codex_home().context("failed to resolve codex_home for execpolicy")?;
// TODO(mbolin): At a minimum, `cwd` should be configurable via
// `codex/sandbox-state/update` or some other custom MCP call.
let cwd = None;
let cli_overrides = Vec::new();
let overrides = codex_core::config_loader::LoaderOverrides::default();
let config_layer_stack = codex_core::config_loader::load_config_layers_state(
&codex_home,
cwd,
&cli_overrides,
overrides,
codex_core::config_loader::CloudRequirementsLoader::default(),
)
.await?;
codex_core::load_exec_policy(&config_layer_stack)
.await
.map_err(anyhow::Error::from)
}
#[cfg(test)]
mod tests {
use super::*;
use codex_core::sandboxing::SandboxPermissions;
use codex_execpolicy::Decision;
use codex_execpolicy::Policy;
use pretty_assertions::assert_eq;
use std::path::Path;
#[test]
fn evaluate_exec_policy_uses_heuristics_for_dangerous_commands() {
let policy = Policy::empty();
let file = Path::new("/bin/rm");
let argv = vec!["rm".to_string(), "-rf".to_string(), "/".to_string()];
let outcome = evaluate_exec_policy(&policy, file, &argv, false).expect("policy evaluation");
assert_eq!(
outcome,
ExecPolicyOutcome::Prompt {
sandbox_permissions: SandboxPermissions::UseDefault
}
);
}
#[test]
fn evaluate_exec_policy_respects_preserve_program_paths() {
let mut policy = Policy::empty();
policy
.add_prefix_rule(
&[
"/usr/local/bin/custom-cmd".to_string(),
"--flag".to_string(),
],
Decision::Allow,
)
.expect("policy rule should be added");
let file = Path::new("/usr/local/bin/custom-cmd");
let argv = vec![
"/usr/local/bin/custom-cmd".to_string(),
"--flag".to_string(),
"value".to_string(),
];
let outcome = evaluate_exec_policy(&policy, file, &argv, true).expect("policy evaluation");
assert_eq!(
outcome,
ExecPolicyOutcome::Allow {
sandbox_permissions: SandboxPermissions::RequireEscalated
}
);
}
}

View File

@@ -1,307 +0,0 @@
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use anyhow::Context as _;
use anyhow::Result;
use codex_core::MCP_SANDBOX_STATE_CAPABILITY;
use codex_core::MCP_SANDBOX_STATE_METHOD;
use codex_core::SandboxState;
use codex_execpolicy::Policy;
use codex_protocol::protocol::SandboxPolicy;
use codex_shell_escalation::EscalationPolicyFactory;
use codex_shell_escalation::run_escalate_server;
use rmcp::ErrorData as McpError;
use rmcp::RoleServer;
use rmcp::ServerHandler;
use rmcp::ServiceExt;
use rmcp::handler::server::router::tool::ToolRouter;
use rmcp::handler::server::wrapper::Parameters;
use rmcp::model::CustomRequest;
use rmcp::model::CustomResult;
use rmcp::model::*;
use rmcp::service::RequestContext;
use rmcp::service::RunningService;
use rmcp::tool;
use rmcp::tool_handler;
use rmcp::tool_router;
use rmcp::transport::stdio;
use serde_json::json;
use tokio::sync::RwLock;
use crate::unix::mcp_escalation_policy::McpEscalationPolicy;
/// Path to our patched bash.
const CODEX_BASH_PATH_ENV_VAR: &str = "CODEX_BASH_PATH";
const SANDBOX_STATE_CAPABILITY_VERSION: &str = "1.0.0";
pub(crate) fn get_bash_path() -> Result<PathBuf> {
std::env::var(CODEX_BASH_PATH_ENV_VAR)
.map(PathBuf::from)
.context(format!("{CODEX_BASH_PATH_ENV_VAR} must be set"))
}
#[derive(Debug, serde::Serialize, serde::Deserialize)]
pub struct ExecResult {
pub exit_code: i32,
pub output: String,
pub duration: Duration,
pub timed_out: bool,
}
impl From<codex_shell_escalation::ExecResult> for ExecResult {
fn from(result: codex_shell_escalation::ExecResult) -> Self {
Self {
exit_code: result.exit_code,
output: result.output,
duration: result.duration,
timed_out: result.timed_out,
}
}
}
#[derive(Clone)]
pub struct ExecTool {
tool_router: ToolRouter<ExecTool>,
bash_path: PathBuf,
execve_wrapper: PathBuf,
policy: Arc<RwLock<Policy>>,
preserve_program_paths: bool,
sandbox_state: Arc<RwLock<Option<SandboxState>>>,
}
#[derive(Debug, serde::Serialize, serde::Deserialize, rmcp::schemars::JsonSchema)]
pub struct ExecParams {
/// The bash string to execute.
pub command: String,
/// The working directory to execute the command in. Must be an absolute path.
pub workdir: String,
/// The timeout for the command in milliseconds.
pub timeout_ms: Option<u64>,
/// Launch Bash with -lc instead of -c: defaults to true.
pub login: Option<bool>,
}
impl From<ExecParams> for codex_shell_escalation::ExecParams {
fn from(inner: ExecParams) -> Self {
Self {
command: inner.command,
workdir: inner.workdir,
timeout_ms: inner.timeout_ms,
login: inner.login,
}
}
}
struct McpEscalationPolicyFactory {
context: RequestContext<RoleServer>,
preserve_program_paths: bool,
}
impl EscalationPolicyFactory for McpEscalationPolicyFactory {
type Policy = McpEscalationPolicy;
fn create_policy(
&self,
policy: Arc<RwLock<Policy>>,
stopwatch: codex_shell_escalation::Stopwatch,
) -> Self::Policy {
McpEscalationPolicy::new(
policy,
self.context.clone(),
stopwatch,
self.preserve_program_paths,
)
}
}
#[tool_router]
impl ExecTool {
pub fn new(
bash_path: PathBuf,
execve_wrapper: PathBuf,
policy: Arc<RwLock<Policy>>,
preserve_program_paths: bool,
) -> Self {
Self {
tool_router: Self::tool_router(),
bash_path,
execve_wrapper,
policy,
preserve_program_paths,
sandbox_state: Arc::new(RwLock::new(None)),
}
}
/// Runs a shell command and returns its output. You MUST provide the workdir as an absolute path.
#[tool]
async fn shell(
&self,
context: RequestContext<RoleServer>,
Parameters(params): Parameters<ExecParams>,
) -> Result<CallToolResult, McpError> {
let effective_timeout = Duration::from_millis(
params
.timeout_ms
.unwrap_or(codex_core::exec::DEFAULT_EXEC_COMMAND_TIMEOUT_MS),
);
let sandbox_state =
self.sandbox_state
.read()
.await
.clone()
.unwrap_or_else(|| SandboxState {
sandbox_policy: SandboxPolicy::new_read_only_policy(),
codex_linux_sandbox_exe: None,
sandbox_cwd: PathBuf::from(&params.workdir),
use_linux_sandbox_bwrap: false,
});
let result = run_escalate_server(
params.into(),
&sandbox_state,
&self.bash_path,
&self.execve_wrapper,
self.policy.clone(),
McpEscalationPolicyFactory {
context,
preserve_program_paths: self.preserve_program_paths,
},
effective_timeout,
)
.await
.map_err(|e| McpError::internal_error(e.to_string(), None))?;
Ok(CallToolResult::success(vec![Content::json(
ExecResult::from(result),
)?]))
}
}
#[derive(Default)]
pub struct CodexSandboxStateUpdateMethod;
impl rmcp::model::ConstString for CodexSandboxStateUpdateMethod {
const VALUE: &'static str = MCP_SANDBOX_STATE_METHOD;
}
#[tool_handler]
impl ServerHandler for ExecTool {
fn get_info(&self) -> ServerInfo {
let mut experimental_capabilities = ExperimentalCapabilities::new();
let mut sandbox_state_capability = JsonObject::new();
sandbox_state_capability.insert(
"version".to_string(),
serde_json::Value::String(SANDBOX_STATE_CAPABILITY_VERSION.to_string()),
);
experimental_capabilities.insert(
MCP_SANDBOX_STATE_CAPABILITY.to_string(),
sandbox_state_capability,
);
ServerInfo {
protocol_version: ProtocolVersion::V_2025_06_18,
capabilities: ServerCapabilities::builder()
.enable_tools()
.enable_experimental_with(experimental_capabilities)
.build(),
server_info: Implementation::from_build_env(),
instructions: Some(
"This server provides a tool to execute shell commands and return their output."
.to_string(),
),
}
}
async fn initialize(
&self,
_request: InitializeRequestParams,
_context: RequestContext<RoleServer>,
) -> Result<InitializeResult, McpError> {
Ok(self.get_info())
}
async fn on_custom_request(
&self,
request: CustomRequest,
_context: rmcp::service::RequestContext<rmcp::RoleServer>,
) -> Result<CustomResult, McpError> {
let CustomRequest { method, params, .. } = request;
if method != MCP_SANDBOX_STATE_METHOD {
return Err(McpError::method_not_found::<CodexSandboxStateUpdateMethod>());
}
let Some(params) = params else {
return Err(McpError::invalid_params(
"missing params for sandbox state request".to_string(),
None,
));
};
let Ok(sandbox_state) = serde_json::from_value::<SandboxState>(params.clone()) else {
return Err(McpError::invalid_params(
"failed to deserialize sandbox state".to_string(),
Some(params),
));
};
*self.sandbox_state.write().await = Some(sandbox_state);
Ok(CustomResult::new(json!({})))
}
}
pub(crate) async fn serve(
bash_path: PathBuf,
execve_wrapper: PathBuf,
policy: Arc<RwLock<Policy>>,
preserve_program_paths: bool,
) -> Result<RunningService<RoleServer, ExecTool>, rmcp::service::ServerInitializeError> {
let tool = ExecTool::new(bash_path, execve_wrapper, policy, preserve_program_paths);
tool.serve(stdio()).await
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
use serde_json::json;
/// Verify that the way we use serde does not compromise the desired JSON
/// schema via schemars. In particular, ensure that the `login` and
/// `timeout_ms` fields are optional.
#[test]
fn exec_params_json_schema_matches_expected() {
let schema = rmcp::schemars::schema_for!(ExecParams);
let actual = serde_json::to_value(schema).expect("schema should serialize");
assert_eq!(
actual,
json!({
"$schema": "https://json-schema.org/draft/2020-12/schema",
"title": "ExecParams",
"type": "object",
"properties": {
"command": {
"description": "The bash string to execute.",
"type": "string"
},
"login": {
"description": "Launch Bash with -lc instead of -c: defaults to true.",
"type": ["boolean", "null"]
},
"timeout_ms": {
"description": "The timeout for the command in milliseconds.",
"format": "uint64",
"minimum": 0,
"type": ["integer", "null"]
},
"workdir": {
"description":
"The working directory to execute the command in. Must be an absolute path.",
"type": "string"
}
},
"required": ["command", "workdir"]
})
);
}
}

View File

@@ -1,150 +0,0 @@
use std::path::Path;
use codex_core::sandboxing::SandboxPermissions;
use codex_execpolicy::Policy;
use codex_shell_escalation::EscalateAction;
use codex_shell_escalation::EscalationPolicy;
use codex_shell_escalation::Stopwatch;
use rmcp::ErrorData as McpError;
use rmcp::RoleServer;
use rmcp::model::CreateElicitationRequestParams;
use rmcp::model::CreateElicitationResult;
use rmcp::model::ElicitationAction;
use rmcp::model::ElicitationSchema;
use rmcp::service::RequestContext;
use shlex::try_join;
use std::sync::Arc;
use tokio::sync::RwLock;
#[derive(Debug, PartialEq, Eq)]
pub(crate) enum ExecPolicyOutcome {
Allow {
sandbox_permissions: SandboxPermissions,
},
Prompt {
sandbox_permissions: SandboxPermissions,
},
Forbidden,
}
/// ExecPolicy with access to the MCP RequestContext so that it can leverage
/// elicitations.
pub(crate) struct McpEscalationPolicy {
/// In-memory execpolicy rules that drive how to handle an exec() call.
policy: Arc<RwLock<Policy>>,
context: RequestContext<RoleServer>,
stopwatch: Stopwatch,
preserve_program_paths: bool,
}
impl McpEscalationPolicy {
pub(crate) fn new(
policy: Arc<RwLock<Policy>>,
context: RequestContext<RoleServer>,
stopwatch: Stopwatch,
preserve_program_paths: bool,
) -> Self {
Self {
policy,
context,
stopwatch,
preserve_program_paths,
}
}
async fn prompt(
&self,
file: &Path,
argv: &[String],
workdir: &Path,
context: RequestContext<RoleServer>,
) -> Result<CreateElicitationResult, McpError> {
let args = try_join(argv.iter().skip(1).map(String::as_str)).unwrap_or_default();
let command = if args.is_empty() {
file.display().to_string()
} else {
format!("{} {}", file.display(), args)
};
self.stopwatch
.pause_for(async {
context
.peer
.create_elicitation(CreateElicitationRequestParams::FormElicitationParams {
meta: None,
message: format!(
"Allow agent to run `{command}` in `{}`?",
workdir.display()
),
requested_schema: ElicitationSchema::builder()
.title("Execution Permission Request")
.optional_string_with("reason", |schema| {
schema.description(
"Optional reason for allowing or denying execution",
)
})
.build()
.map_err(|e| {
McpError::internal_error(
format!("failed to build elicitation schema: {e}"),
None,
)
})?,
})
.await
.map_err(|e| McpError::internal_error(e.to_string(), None))
})
.await
}
}
#[async_trait::async_trait]
impl EscalationPolicy for McpEscalationPolicy {
async fn determine_action(
&self,
file: &Path,
argv: &[String],
workdir: &Path,
) -> anyhow::Result<EscalateAction> {
let policy = self.policy.read().await;
let outcome =
crate::unix::evaluate_exec_policy(&policy, file, argv, self.preserve_program_paths)?;
let action = match outcome {
ExecPolicyOutcome::Allow {
sandbox_permissions,
} => {
if sandbox_permissions.requires_escalated_permissions() {
EscalateAction::Escalate
} else {
EscalateAction::Run
}
}
ExecPolicyOutcome::Prompt {
sandbox_permissions,
} => {
let result = self
.prompt(file, argv, workdir, self.context.clone())
.await?;
// TODO: Extract reason from `result.content`.
match result.action {
ElicitationAction::Accept => {
if sandbox_permissions.requires_escalated_permissions() {
EscalateAction::Escalate
} else {
EscalateAction::Run
}
}
ElicitationAction::Decline => EscalateAction::Deny {
reason: Some("User declined execution".to_string()),
},
ElicitationAction::Cancel => EscalateAction::Deny {
reason: Some("User cancelled execution".to_string()),
},
}
}
ExecPolicyOutcome::Forbidden => EscalateAction::Deny {
reason: Some("Execution forbidden by policy".to_string()),
},
};
Ok(action)
}
}