mirror of
https://github.com/openai/codex.git
synced 2026-03-05 13:35:28 +03:00
refactor: delete exec-server and move execve wrapper into shell-escalation (#12632)
## Why We already plan to remove the shell-tool MCP path, and doing that cleanup first makes the follow-on `shell-escalation` work much simpler. This change removes the last remaining reason to keep `codex-rs/exec-server` around by moving the `codex-execve-wrapper` binary and shared shell test fixtures to the crates/tests that now own that functionality. ## What Changed ### Delete `codex-rs/exec-server` - Remove the `exec-server` crate, including the MCP server binary, MCP-specific modules, and its test support/test suite - Remove `exec-server` from the `codex-rs` workspace and update `Cargo.lock` ### Move `codex-execve-wrapper` into `codex-rs/shell-escalation` - Move the wrapper implementation into `shell-escalation` (`src/unix/execve_wrapper.rs`) - Add the `codex-execve-wrapper` binary entrypoint under `shell-escalation/src/bin/` - Update `shell-escalation` exports/module layout so the wrapper entrypoint is hosted there - Move the wrapper README content from `exec-server` to `shell-escalation/README.md` ### Move shared shell test fixtures to `app-server` - Move the DotSlash `bash`/`zsh` test fixtures from `exec-server/tests/suite/` to `app-server/tests/suite/` - Update `app-server` zsh-fork tests to reference the new fixture paths ### Keep `shell-tool-mcp` as a shell-assets package - Update `.github/workflows/shell-tool-mcp.yml` packaging so the npm artifact contains only patched Bash/Zsh payloads (no Rust binaries) - Update `shell-tool-mcp/package.json`, `shell-tool-mcp/src/index.ts`, and docs to reflect the shell-assets-only package shape - `shell-tool-mcp-ci.yml` does not need changes because it is already JS-only ## Verification - `cargo shear` - `cargo clippy -p codex-shell-escalation --tests` - `just clippy`
This commit is contained in:
126
.github/workflows/shell-tool-mcp.yml
vendored
126
.github/workflows/shell-tool-mcp.yml
vendored
@@ -67,128 +67,6 @@ jobs:
|
||||
echo "npm_tag=${npm_tag}" >> "$GITHUB_OUTPUT"
|
||||
echo "should_publish=${should_publish}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
rust-binaries:
|
||||
name: Build Rust - ${{ matrix.target }}
|
||||
needs: metadata
|
||||
runs-on: ${{ matrix.runner }}
|
||||
timeout-minutes: 30
|
||||
env:
|
||||
CARGO_PROFILE_RELEASE_LTO: ${{ contains(needs.metadata.outputs.version, '-alpha') && 'thin' || 'fat' }}
|
||||
defaults:
|
||||
run:
|
||||
working-directory: codex-rs
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- runner: macos-15-xlarge
|
||||
target: aarch64-apple-darwin
|
||||
- runner: macos-15-xlarge
|
||||
target: x86_64-apple-darwin
|
||||
- runner: ubuntu-24.04
|
||||
target: x86_64-unknown-linux-musl
|
||||
install_musl: true
|
||||
- runner: ubuntu-24.04-arm
|
||||
target: aarch64-unknown-linux-musl
|
||||
install_musl: true
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Install UBSan runtime (musl)
|
||||
if: ${{ matrix.install_musl }}
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
if command -v apt-get >/dev/null 2>&1; then
|
||||
sudo apt-get update -y
|
||||
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y libubsan1
|
||||
fi
|
||||
|
||||
- uses: dtolnay/rust-toolchain@1.93.0
|
||||
with:
|
||||
targets: ${{ matrix.target }}
|
||||
|
||||
- if: ${{ matrix.install_musl }}
|
||||
name: Install Zig
|
||||
uses: mlugg/setup-zig@v2
|
||||
with:
|
||||
version: 0.14.0
|
||||
|
||||
- if: ${{ matrix.install_musl }}
|
||||
name: Install musl build dependencies
|
||||
env:
|
||||
TARGET: ${{ matrix.target }}
|
||||
run: bash "${GITHUB_WORKSPACE}/.github/scripts/install-musl-build-tools.sh"
|
||||
|
||||
- if: ${{ matrix.install_musl }}
|
||||
name: Configure rustc UBSan wrapper (musl host)
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ubsan=""
|
||||
if command -v ldconfig >/dev/null 2>&1; then
|
||||
ubsan="$(ldconfig -p | grep -m1 'libubsan\.so\.1' | sed -E 's/.*=> (.*)$/\1/')"
|
||||
fi
|
||||
wrapper_root="${RUNNER_TEMP:-/tmp}"
|
||||
wrapper="${wrapper_root}/rustc-ubsan-wrapper"
|
||||
cat > "${wrapper}" <<EOF
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
if [[ -n "${ubsan}" ]]; then
|
||||
export LD_PRELOAD="${ubsan}\${LD_PRELOAD:+:\${LD_PRELOAD}}"
|
||||
fi
|
||||
exec "\$1" "\${@:2}"
|
||||
EOF
|
||||
chmod +x "${wrapper}"
|
||||
echo "RUSTC_WRAPPER=${wrapper}" >> "$GITHUB_ENV"
|
||||
echo "RUSTC_WORKSPACE_WRAPPER=" >> "$GITHUB_ENV"
|
||||
|
||||
- if: ${{ matrix.install_musl }}
|
||||
name: Clear sanitizer flags (musl)
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# Clear global Rust flags so host/proc-macro builds don't pull in UBSan.
|
||||
echo "RUSTFLAGS=" >> "$GITHUB_ENV"
|
||||
echo "CARGO_ENCODED_RUSTFLAGS=" >> "$GITHUB_ENV"
|
||||
echo "RUSTDOCFLAGS=" >> "$GITHUB_ENV"
|
||||
# Override any runner-level Cargo config rustflags as well.
|
||||
echo "CARGO_BUILD_RUSTFLAGS=" >> "$GITHUB_ENV"
|
||||
echo "CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUSTFLAGS=" >> "$GITHUB_ENV"
|
||||
echo "CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUSTFLAGS=" >> "$GITHUB_ENV"
|
||||
echo "CARGO_TARGET_X86_64_UNKNOWN_LINUX_MUSL_RUSTFLAGS=" >> "$GITHUB_ENV"
|
||||
echo "CARGO_TARGET_AARCH64_UNKNOWN_LINUX_MUSL_RUSTFLAGS=" >> "$GITHUB_ENV"
|
||||
|
||||
sanitize_flags() {
|
||||
local input="$1"
|
||||
input="${input//-fsanitize=undefined/}"
|
||||
input="${input//-fno-sanitize-recover=undefined/}"
|
||||
input="${input//-fno-sanitize-trap=undefined/}"
|
||||
echo "$input"
|
||||
}
|
||||
|
||||
cflags="$(sanitize_flags "${CFLAGS-}")"
|
||||
cxxflags="$(sanitize_flags "${CXXFLAGS-}")"
|
||||
echo "CFLAGS=${cflags}" >> "$GITHUB_ENV"
|
||||
echo "CXXFLAGS=${cxxflags}" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Build exec server binaries
|
||||
run: cargo build --release --target ${{ matrix.target }} --bin codex-exec-mcp-server --bin codex-execve-wrapper
|
||||
|
||||
- name: Stage exec server binaries
|
||||
run: |
|
||||
dest="${GITHUB_WORKSPACE}/artifacts/vendor/${{ matrix.target }}"
|
||||
mkdir -p "$dest"
|
||||
cp "target/${{ matrix.target }}/release/codex-exec-mcp-server" "$dest/"
|
||||
cp "target/${{ matrix.target }}/release/codex-execve-wrapper" "$dest/"
|
||||
|
||||
- uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: shell-tool-mcp-rust-${{ matrix.target }}
|
||||
path: artifacts/**
|
||||
if-no-files-found: error
|
||||
|
||||
bash-linux:
|
||||
name: Build Bash (Linux) - ${{ matrix.variant }} - ${{ matrix.target }}
|
||||
needs: metadata
|
||||
@@ -537,7 +415,6 @@ jobs:
|
||||
name: Package npm module
|
||||
needs:
|
||||
- metadata
|
||||
- rust-binaries
|
||||
- bash-linux
|
||||
- bash-darwin
|
||||
- zsh-linux
|
||||
@@ -579,7 +456,6 @@ jobs:
|
||||
mkdir -p "$staging" "$staging/vendor"
|
||||
cp shell-tool-mcp/README.md "$staging/"
|
||||
cp shell-tool-mcp/package.json "$staging/"
|
||||
cp -R shell-tool-mcp/bin "$staging/"
|
||||
|
||||
found_vendor="false"
|
||||
shopt -s nullglob
|
||||
@@ -613,8 +489,6 @@ jobs:
|
||||
set -euo pipefail
|
||||
staging="${{ steps.staging.outputs.dir }}"
|
||||
chmod +x \
|
||||
"$staging"/vendor/*/codex-exec-mcp-server \
|
||||
"$staging"/vendor/*/codex-execve-wrapper \
|
||||
"$staging"/vendor/*/bash/*/bash \
|
||||
"$staging"/vendor/*/zsh/*/zsh
|
||||
|
||||
|
||||
43
codex-rs/Cargo.lock
generated
43
codex-rs/Cargo.lock
generated
@@ -1835,34 +1835,6 @@ dependencies = [
|
||||
"wiremock",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-exec-server"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"clap",
|
||||
"codex-core",
|
||||
"codex-execpolicy",
|
||||
"codex-protocol",
|
||||
"codex-shell-command",
|
||||
"codex-shell-escalation",
|
||||
"codex-utils-cargo-bin",
|
||||
"core_test_support",
|
||||
"exec_server_test_support",
|
||||
"maplit",
|
||||
"pretty_assertions",
|
||||
"rmcp",
|
||||
"schemars 1.2.1",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"shlex",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-execpolicy"
|
||||
version = "0.0.0"
|
||||
@@ -2265,6 +2237,7 @@ version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"clap",
|
||||
"codex-core",
|
||||
"codex-execpolicy",
|
||||
"codex-protocol",
|
||||
@@ -2278,6 +2251,7 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3693,19 +3667,6 @@ dependencies = [
|
||||
"pin-project-lite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "exec_server_test_support"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"codex-core",
|
||||
"codex-protocol",
|
||||
"codex-utils-cargo-bin",
|
||||
"rmcp",
|
||||
"serde_json",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "eyre"
|
||||
version = "0.6.12"
|
||||
|
||||
@@ -23,7 +23,6 @@ members = [
|
||||
"hooks",
|
||||
"secrets",
|
||||
"exec",
|
||||
"exec-server",
|
||||
"execpolicy",
|
||||
"execpolicy-legacy",
|
||||
"keyring-store",
|
||||
@@ -114,7 +113,6 @@ codex-responses-api-proxy = { path = "responses-api-proxy" }
|
||||
codex-rmcp-client = { path = "rmcp-client" }
|
||||
codex-secrets = { path = "secrets" }
|
||||
codex-shell-command = { path = "shell-command" }
|
||||
codex-shell-escalation = { path = "shell-escalation" }
|
||||
codex-skills = { path = "skills" }
|
||||
codex-state = { path = "state" }
|
||||
codex-stdio-to-uds = { path = "stdio-to-uds" }
|
||||
@@ -138,7 +136,6 @@ codex-utils-sleep-inhibitor = { path = "utils/sleep-inhibitor" }
|
||||
codex-utils-string = { path = "utils/string" }
|
||||
codex-windows-sandbox = { path = "windows-sandbox-rs" }
|
||||
core_test_support = { path = "core/tests/common" }
|
||||
exec_server_test_support = { path = "exec-server/tests/common" }
|
||||
mcp_test_support = { path = "mcp-server/tests/common" }
|
||||
|
||||
# External
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
// This is an instance of the fork of Bash that we bundle with
|
||||
// https://www.npmjs.com/package/@openai/codex-shell-tool-mcp.
|
||||
// Fetching the prebuilt version via DotSlash makes it easier to write
|
||||
// integration tests for the MCP server.
|
||||
// integration tests for shell execution flows.
|
||||
//
|
||||
// TODO(mbolin): Currently, we use a .tgz artifact that includes binaries for
|
||||
// multiple platforms, but we could save a bit of space by making arch-specific
|
||||
@@ -3,7 +3,7 @@
|
||||
// Running these tests with the patched zsh fork:
|
||||
//
|
||||
// The suite resolves the shared test-only zsh DotSlash file at
|
||||
// `exec-server/tests/suite/zsh` via DotSlash on first use, so `dotslash` and
|
||||
// `app-server/tests/suite/zsh` via DotSlash on first use, so `dotslash` and
|
||||
// network access are required the first time the artifact is fetched.
|
||||
|
||||
use anyhow::Result;
|
||||
@@ -741,7 +741,7 @@ stream_max_retries = 0
|
||||
|
||||
fn find_test_zsh_path() -> Result<Option<std::path::PathBuf>> {
|
||||
let repo_root = codex_utils_cargo_bin::repo_root()?;
|
||||
let dotslash_zsh = repo_root.join("codex-rs/exec-server/tests/suite/zsh");
|
||||
let dotslash_zsh = repo_root.join("codex-rs/app-server/tests/suite/zsh");
|
||||
if !dotslash_zsh.is_file() {
|
||||
eprintln!(
|
||||
"skipping zsh fork test: shared zsh DotSlash file not found at {}",
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
// This is the patched zsh fork built by
|
||||
// `.github/workflows/shell-tool-mcp.yml` for the shell-tool-mcp package.
|
||||
// Fetching the prebuilt version via DotSlash makes it easier to write
|
||||
// integration tests that exercise the zsh fork behavior in exec-server tests.
|
||||
// integration tests that exercise the zsh fork behavior in app-server tests.
|
||||
//
|
||||
// TODO(mbolin): Currently, we use a .tgz artifact that includes binaries for
|
||||
// multiple platforms, but we could save a bit of space by making arch-specific
|
||||
@@ -1,11 +0,0 @@
|
||||
load("//:defs.bzl", "codex_rust_crate")
|
||||
|
||||
codex_rust_crate(
|
||||
name = "exec-server",
|
||||
crate_name = "codex_exec_server",
|
||||
integration_deps_extra = ["//codex-rs/exec-server/tests/common:common"],
|
||||
test_tags = ["no-sandbox"],
|
||||
extra_binaries = [
|
||||
"//codex-rs/cli:codex",
|
||||
],
|
||||
)
|
||||
@@ -1,64 +0,0 @@
|
||||
[package]
|
||||
name = "codex-exec-server"
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
version.workspace = true
|
||||
|
||||
[[bin]]
|
||||
name = "codex-execve-wrapper"
|
||||
path = "src/bin/main_execve_wrapper.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "codex-exec-mcp-server"
|
||||
path = "src/bin/main_mcp_server.rs"
|
||||
|
||||
[lib]
|
||||
name = "codex_exec_server"
|
||||
path = "src/lib.rs"
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[package.metadata.cargo-shear]
|
||||
# This appears to be due to #[derive(rmcp::schemars::JsonSchema)], which
|
||||
# requires use of schemars via a macro that shear cannot detect.
|
||||
ignored = ["schemars"]
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
async-trait = { workspace = true }
|
||||
clap = { workspace = true, features = ["derive"] }
|
||||
codex-core = { workspace = true }
|
||||
codex-execpolicy = { workspace = true }
|
||||
codex-protocol = { workspace = true }
|
||||
codex-shell-command = { workspace = true }
|
||||
codex-shell-escalation = { workspace = true }
|
||||
rmcp = { workspace = true, default-features = false, features = [
|
||||
"auth",
|
||||
"elicitation",
|
||||
"base64",
|
||||
"client",
|
||||
"macros",
|
||||
"schemars",
|
||||
"server",
|
||||
"transport-child-process",
|
||||
"transport-streamable-http-client-reqwest",
|
||||
"transport-streamable-http-server",
|
||||
"transport-io",
|
||||
] }
|
||||
schemars = { version = "1.2.1" }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
shlex = { workspace = true }
|
||||
tokio = { workspace = true, features = ["macros", "rt-multi-thread", "signal"] }
|
||||
tracing = { workspace = true }
|
||||
tracing-subscriber = { workspace = true, features = ["env-filter", "fmt"] }
|
||||
|
||||
[dev-dependencies]
|
||||
codex-protocol = { workspace = true }
|
||||
codex-utils-cargo-bin = { workspace = true }
|
||||
core_test_support = { workspace = true }
|
||||
exec_server_test_support = { workspace = true }
|
||||
maplit = { workspace = true }
|
||||
pretty_assertions = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
@@ -1,25 +0,0 @@
|
||||
# codex-exec-server
|
||||
|
||||
This crate contains the code for two executables:
|
||||
|
||||
- `codex-exec-mcp-server` is an MCP server that provides a tool named `shell` that runs a shell command inside a sandboxed shell process. Every resulting `execve(2)` call made within that shell is intercepted and run via the executable defined by the `EXEC_WRAPPER` environment variable within the shell process. In practice, `EXEC_WRAPPER` is set to `codex-execve-wrapper`.
|
||||
- `codex-execve-wrapper` is the executable that takes the arguments to the `execve(2)` call and "escalates" it to the MCP server via a shared file descriptor (specified by the `CODEX_ESCALATE_SOCKET` environment variable) for consideration. Based on the [Codex `.rules`](https://developers.openai.com/codex/local-config#rules-preview), the MCP server replies with one of:
|
||||
- `Run`: `codex-execve-wrapper` should invoke `execve(2)` on itself to run the original command within Bash
|
||||
- `Escalate`: forward the file descriptors of the current process to the MCP server so the command can be run faithfully outside the sandbox. Because the MCP server will have the original FDs for `stdout` and `stderr`, it can write those directly. When the process completes, the MCP server forwards the exit code to `codex-execve-wrapper` so that it exits in a consistent manner.
|
||||
- `Deny`: the MCP server has declared the proposed command to be "forbidden," so `codex-execve-wrapper` will print an error to `stderr` and exit with `1`.
|
||||
|
||||
## Patched Bash
|
||||
|
||||
We carry a small patch to `execute_cmd.c` (see `patches/bash-exec-wrapper.patch`) that adds support for `EXEC_WRAPPER`. The original commit message is “add support for BASH_EXEC_WRAPPER” and the patch applies cleanly to `a8a1c2fac029404d3f42cd39f5a20f24b6e4fe4b` from https://github.com/bminor/bash. To rebuild manually:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/bminor/bash
|
||||
git checkout a8a1c2fac029404d3f42cd39f5a20f24b6e4fe4b
|
||||
git apply /path/to/patches/bash-exec-wrapper.patch
|
||||
./configure --without-bash-malloc
|
||||
make -j"$(nproc)"
|
||||
```
|
||||
|
||||
## Release workflow
|
||||
|
||||
`.github/workflows/shell-tool-mcp.yml` builds the Rust binaries, compiles the patched Bash variants, assembles the `vendor/` tree, and creates `codex-shell-tool-mcp-npm-<version>.tgz` for inclusion in the Rust GitHub Release. When the version is a stable or alpha tag, the workflow also publishes the tarball to npm using OIDC. The workflow is invoked from `rust-release.yml` so the package ships alongside other Codex artifacts.
|
||||
@@ -1,8 +0,0 @@
|
||||
#[cfg(not(unix))]
|
||||
fn main() {
|
||||
eprintln!("codex-exec-mcp-server is only implemented for UNIX");
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
pub use codex_exec_server::main_mcp_server as main;
|
||||
@@ -1,5 +0,0 @@
|
||||
#[cfg(unix)]
|
||||
mod unix;
|
||||
|
||||
#[cfg(unix)]
|
||||
pub use unix::*;
|
||||
@@ -1,301 +0,0 @@
|
||||
//! This is an MCP that implements an alternative `shell` tool with fine-grained privilege
|
||||
//! escalation based on a per-exec() policy.
|
||||
//!
|
||||
//! We spawn Bash process inside a sandbox. The Bash we spawn is patched to allow us to intercept
|
||||
//! every exec() call it makes by invoking a wrapper program and passing in the arguments it would
|
||||
//! have passed to exec(). The Bash process (and its descendants) inherit a communication socket
|
||||
//! from us, and we give its fd number in the CODEX_ESCALATE_SOCKET environment variable.
|
||||
//!
|
||||
//! When we intercept an exec() call, we send a message over the socket back to the main
|
||||
//! MCP process. The MCP process can then decide whether to allow the exec() call to proceed
|
||||
//! or to escalate privileges and run the requested command with elevated permissions. In the
|
||||
//! latter case, we send a message back to the child requesting that it forward its open FDs to us.
|
||||
//! We then execute the requested command on its behalf, patching in the forwarded FDs.
|
||||
//!
|
||||
//!
|
||||
//! ### The privilege escalation flow
|
||||
//!
|
||||
//! Child MCP Bash Escalate Helper
|
||||
//! |
|
||||
//! o----->o
|
||||
//! | |
|
||||
//! | o--(exec)-->o
|
||||
//! | | |
|
||||
//! |o<-(EscalateReq)--o
|
||||
//! || | |
|
||||
//! |o--(Escalate)---->o
|
||||
//! || | |
|
||||
//! |o<---------(fds)--o
|
||||
//! || | |
|
||||
//! o<-----o | |
|
||||
//! | || | |
|
||||
//! x----->o | |
|
||||
//! || | |
|
||||
//! |x--(exit code)--->o
|
||||
//! | | |
|
||||
//! | o<--(exit)--x
|
||||
//! | |
|
||||
//! o<-----x
|
||||
//!
|
||||
//! ### The non-escalation flow
|
||||
//!
|
||||
//! MCP Bash Escalate Helper Child
|
||||
//! |
|
||||
//! o----->o
|
||||
//! | |
|
||||
//! | o--(exec)-->o
|
||||
//! | | |
|
||||
//! |o<-(EscalateReq)--o
|
||||
//! || | |
|
||||
//! |o-(Run)---------->o
|
||||
//! | | |
|
||||
//! | | x--(exec)-->o
|
||||
//! | | |
|
||||
//! | o<--------------(exit)--x
|
||||
//! | |
|
||||
//! o<-----x
|
||||
//!
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Context as _;
|
||||
use clap::Parser;
|
||||
use codex_core::config::find_codex_home;
|
||||
use codex_core::sandboxing::SandboxPermissions;
|
||||
use codex_execpolicy::Decision;
|
||||
use codex_execpolicy::Policy;
|
||||
use codex_execpolicy::RuleMatch;
|
||||
use codex_shell_command::is_dangerous_command::command_might_be_dangerous;
|
||||
use codex_shell_escalation as shell_escalation;
|
||||
use rmcp::ErrorData as McpError;
|
||||
use tokio::sync::RwLock;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
use tracing_subscriber::{self};
|
||||
|
||||
use crate::unix::mcp_escalation_policy::ExecPolicyOutcome;
|
||||
|
||||
mod mcp;
|
||||
mod mcp_escalation_policy;
|
||||
|
||||
pub use mcp::ExecResult;
|
||||
|
||||
/// Default value of --execve option relative to the current executable.
|
||||
/// Note this must match the name of the binary as specified in Cargo.toml.
|
||||
const CODEX_EXECVE_WRAPPER_EXE_NAME: &str = "codex-execve-wrapper";
|
||||
|
||||
#[derive(Parser)]
|
||||
#[clap(version)]
|
||||
struct McpServerCli {
|
||||
/// Executable to delegate execve(2) calls to in Bash.
|
||||
#[arg(long = "execve")]
|
||||
execve_wrapper: Option<PathBuf>,
|
||||
|
||||
/// Path to Bash that has been patched to support execve() wrapping.
|
||||
#[arg(long = "bash")]
|
||||
bash_path: Option<PathBuf>,
|
||||
|
||||
/// Preserve program paths when applying execpolicy (e.g., keep /usr/bin/echo instead of echo).
|
||||
/// Note: this does change the actual program being run.
|
||||
#[arg(long)]
|
||||
preserve_program_paths: bool,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
pub async fn main_mcp_server() -> anyhow::Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(EnvFilter::from_default_env())
|
||||
.with_writer(std::io::stderr)
|
||||
.with_ansi(false)
|
||||
.init();
|
||||
|
||||
let cli = McpServerCli::parse();
|
||||
let execve_wrapper = match cli.execve_wrapper {
|
||||
Some(path) => path,
|
||||
None => {
|
||||
let cwd = std::env::current_exe()?;
|
||||
cwd.parent()
|
||||
.map(|p| p.join(CODEX_EXECVE_WRAPPER_EXE_NAME))
|
||||
.ok_or_else(|| {
|
||||
anyhow::anyhow!("failed to determine execve wrapper path from current exe")
|
||||
})?
|
||||
}
|
||||
};
|
||||
let bash_path = match cli.bash_path {
|
||||
Some(path) => path,
|
||||
None => mcp::get_bash_path()?,
|
||||
};
|
||||
let policy = Arc::new(RwLock::new(load_exec_policy().await?));
|
||||
|
||||
tracing::info!("Starting MCP server");
|
||||
let service = mcp::serve(
|
||||
bash_path,
|
||||
execve_wrapper,
|
||||
policy,
|
||||
cli.preserve_program_paths,
|
||||
)
|
||||
.await
|
||||
.inspect_err(|e| {
|
||||
tracing::error!("serving error: {:?}", e);
|
||||
})?;
|
||||
|
||||
service.waiting().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
pub struct ExecveWrapperCli {
|
||||
file: String,
|
||||
|
||||
#[arg(trailing_var_arg = true)]
|
||||
argv: Vec<String>,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
pub async fn main_execve_wrapper() -> anyhow::Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(EnvFilter::from_default_env())
|
||||
.with_writer(std::io::stderr)
|
||||
.with_ansi(false)
|
||||
.init();
|
||||
|
||||
let ExecveWrapperCli { file, argv } = ExecveWrapperCli::parse();
|
||||
let exit_code = shell_escalation::run(file, argv).await?;
|
||||
std::process::exit(exit_code);
|
||||
}
|
||||
|
||||
/// Decide how to handle an exec() call for a specific command.
|
||||
///
|
||||
/// `file` is the absolute, canonical path to the executable to run, i.e. the first arg to exec.
|
||||
/// `argv` is the argv, including the program name (`argv[0]`).
|
||||
pub(crate) fn evaluate_exec_policy(
|
||||
policy: &Policy,
|
||||
file: &Path,
|
||||
argv: &[String],
|
||||
preserve_program_paths: bool,
|
||||
) -> Result<ExecPolicyOutcome, McpError> {
|
||||
let program_name = format_program_name(file, preserve_program_paths).ok_or_else(|| {
|
||||
McpError::internal_error(
|
||||
format!("failed to format program name for `{}`", file.display()),
|
||||
None,
|
||||
)
|
||||
})?;
|
||||
let command: Vec<String> = std::iter::once(program_name)
|
||||
// Use the normalized program name instead of argv[0].
|
||||
.chain(argv.iter().skip(1).cloned())
|
||||
.collect();
|
||||
let evaluation = policy.check(&command, &|cmd| {
|
||||
if command_might_be_dangerous(cmd) {
|
||||
Decision::Prompt
|
||||
} else {
|
||||
Decision::Allow
|
||||
}
|
||||
});
|
||||
|
||||
// decisions driven by policy should run outside sandbox
|
||||
let decision_driven_by_policy = evaluation.matched_rules.iter().any(|rule_match| {
|
||||
!matches!(rule_match, RuleMatch::HeuristicsRuleMatch { .. })
|
||||
&& rule_match.decision() == evaluation.decision
|
||||
});
|
||||
|
||||
let sandbox_permissions = if decision_driven_by_policy {
|
||||
SandboxPermissions::RequireEscalated
|
||||
} else {
|
||||
SandboxPermissions::UseDefault
|
||||
};
|
||||
|
||||
Ok(match evaluation.decision {
|
||||
Decision::Forbidden => ExecPolicyOutcome::Forbidden,
|
||||
Decision::Prompt => ExecPolicyOutcome::Prompt {
|
||||
sandbox_permissions,
|
||||
},
|
||||
Decision::Allow => ExecPolicyOutcome::Allow {
|
||||
sandbox_permissions,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
fn format_program_name(path: &Path, preserve_program_paths: bool) -> Option<String> {
|
||||
if preserve_program_paths {
|
||||
path.to_str().map(str::to_string)
|
||||
} else {
|
||||
path.file_name()?.to_str().map(str::to_string)
|
||||
}
|
||||
}
|
||||
|
||||
async fn load_exec_policy() -> anyhow::Result<Policy> {
|
||||
let codex_home = find_codex_home().context("failed to resolve codex_home for execpolicy")?;
|
||||
|
||||
// TODO(mbolin): At a minimum, `cwd` should be configurable via
|
||||
// `codex/sandbox-state/update` or some other custom MCP call.
|
||||
let cwd = None;
|
||||
let cli_overrides = Vec::new();
|
||||
let overrides = codex_core::config_loader::LoaderOverrides::default();
|
||||
let config_layer_stack = codex_core::config_loader::load_config_layers_state(
|
||||
&codex_home,
|
||||
cwd,
|
||||
&cli_overrides,
|
||||
overrides,
|
||||
codex_core::config_loader::CloudRequirementsLoader::default(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
codex_core::load_exec_policy(&config_layer_stack)
|
||||
.await
|
||||
.map_err(anyhow::Error::from)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use codex_core::sandboxing::SandboxPermissions;
|
||||
use codex_execpolicy::Decision;
|
||||
use codex_execpolicy::Policy;
|
||||
use pretty_assertions::assert_eq;
|
||||
use std::path::Path;
|
||||
|
||||
#[test]
|
||||
fn evaluate_exec_policy_uses_heuristics_for_dangerous_commands() {
|
||||
let policy = Policy::empty();
|
||||
let file = Path::new("/bin/rm");
|
||||
let argv = vec!["rm".to_string(), "-rf".to_string(), "/".to_string()];
|
||||
|
||||
let outcome = evaluate_exec_policy(&policy, file, &argv, false).expect("policy evaluation");
|
||||
|
||||
assert_eq!(
|
||||
outcome,
|
||||
ExecPolicyOutcome::Prompt {
|
||||
sandbox_permissions: SandboxPermissions::UseDefault
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn evaluate_exec_policy_respects_preserve_program_paths() {
|
||||
let mut policy = Policy::empty();
|
||||
policy
|
||||
.add_prefix_rule(
|
||||
&[
|
||||
"/usr/local/bin/custom-cmd".to_string(),
|
||||
"--flag".to_string(),
|
||||
],
|
||||
Decision::Allow,
|
||||
)
|
||||
.expect("policy rule should be added");
|
||||
let file = Path::new("/usr/local/bin/custom-cmd");
|
||||
let argv = vec![
|
||||
"/usr/local/bin/custom-cmd".to_string(),
|
||||
"--flag".to_string(),
|
||||
"value".to_string(),
|
||||
];
|
||||
|
||||
let outcome = evaluate_exec_policy(&policy, file, &argv, true).expect("policy evaluation");
|
||||
|
||||
assert_eq!(
|
||||
outcome,
|
||||
ExecPolicyOutcome::Allow {
|
||||
sandbox_permissions: SandboxPermissions::RequireEscalated
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,307 +0,0 @@
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::Context as _;
|
||||
use anyhow::Result;
|
||||
use codex_core::MCP_SANDBOX_STATE_CAPABILITY;
|
||||
use codex_core::MCP_SANDBOX_STATE_METHOD;
|
||||
use codex_core::SandboxState;
|
||||
use codex_execpolicy::Policy;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use codex_shell_escalation::EscalationPolicyFactory;
|
||||
use codex_shell_escalation::run_escalate_server;
|
||||
use rmcp::ErrorData as McpError;
|
||||
use rmcp::RoleServer;
|
||||
use rmcp::ServerHandler;
|
||||
use rmcp::ServiceExt;
|
||||
use rmcp::handler::server::router::tool::ToolRouter;
|
||||
use rmcp::handler::server::wrapper::Parameters;
|
||||
use rmcp::model::CustomRequest;
|
||||
use rmcp::model::CustomResult;
|
||||
use rmcp::model::*;
|
||||
use rmcp::service::RequestContext;
|
||||
use rmcp::service::RunningService;
|
||||
use rmcp::tool;
|
||||
use rmcp::tool_handler;
|
||||
use rmcp::tool_router;
|
||||
use rmcp::transport::stdio;
|
||||
use serde_json::json;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::unix::mcp_escalation_policy::McpEscalationPolicy;
|
||||
|
||||
/// Path to our patched bash.
|
||||
const CODEX_BASH_PATH_ENV_VAR: &str = "CODEX_BASH_PATH";
|
||||
|
||||
const SANDBOX_STATE_CAPABILITY_VERSION: &str = "1.0.0";
|
||||
|
||||
pub(crate) fn get_bash_path() -> Result<PathBuf> {
|
||||
std::env::var(CODEX_BASH_PATH_ENV_VAR)
|
||||
.map(PathBuf::from)
|
||||
.context(format!("{CODEX_BASH_PATH_ENV_VAR} must be set"))
|
||||
}
|
||||
|
||||
#[derive(Debug, serde::Serialize, serde::Deserialize)]
|
||||
pub struct ExecResult {
|
||||
pub exit_code: i32,
|
||||
pub output: String,
|
||||
pub duration: Duration,
|
||||
pub timed_out: bool,
|
||||
}
|
||||
|
||||
impl From<codex_shell_escalation::ExecResult> for ExecResult {
|
||||
fn from(result: codex_shell_escalation::ExecResult) -> Self {
|
||||
Self {
|
||||
exit_code: result.exit_code,
|
||||
output: result.output,
|
||||
duration: result.duration,
|
||||
timed_out: result.timed_out,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ExecTool {
|
||||
tool_router: ToolRouter<ExecTool>,
|
||||
bash_path: PathBuf,
|
||||
execve_wrapper: PathBuf,
|
||||
policy: Arc<RwLock<Policy>>,
|
||||
preserve_program_paths: bool,
|
||||
sandbox_state: Arc<RwLock<Option<SandboxState>>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, serde::Serialize, serde::Deserialize, rmcp::schemars::JsonSchema)]
|
||||
pub struct ExecParams {
|
||||
/// The bash string to execute.
|
||||
pub command: String,
|
||||
/// The working directory to execute the command in. Must be an absolute path.
|
||||
pub workdir: String,
|
||||
/// The timeout for the command in milliseconds.
|
||||
pub timeout_ms: Option<u64>,
|
||||
/// Launch Bash with -lc instead of -c: defaults to true.
|
||||
pub login: Option<bool>,
|
||||
}
|
||||
|
||||
impl From<ExecParams> for codex_shell_escalation::ExecParams {
|
||||
fn from(inner: ExecParams) -> Self {
|
||||
Self {
|
||||
command: inner.command,
|
||||
workdir: inner.workdir,
|
||||
timeout_ms: inner.timeout_ms,
|
||||
login: inner.login,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct McpEscalationPolicyFactory {
|
||||
context: RequestContext<RoleServer>,
|
||||
preserve_program_paths: bool,
|
||||
}
|
||||
|
||||
impl EscalationPolicyFactory for McpEscalationPolicyFactory {
|
||||
type Policy = McpEscalationPolicy;
|
||||
|
||||
fn create_policy(
|
||||
&self,
|
||||
policy: Arc<RwLock<Policy>>,
|
||||
stopwatch: codex_shell_escalation::Stopwatch,
|
||||
) -> Self::Policy {
|
||||
McpEscalationPolicy::new(
|
||||
policy,
|
||||
self.context.clone(),
|
||||
stopwatch,
|
||||
self.preserve_program_paths,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[tool_router]
|
||||
impl ExecTool {
|
||||
pub fn new(
|
||||
bash_path: PathBuf,
|
||||
execve_wrapper: PathBuf,
|
||||
policy: Arc<RwLock<Policy>>,
|
||||
preserve_program_paths: bool,
|
||||
) -> Self {
|
||||
Self {
|
||||
tool_router: Self::tool_router(),
|
||||
bash_path,
|
||||
execve_wrapper,
|
||||
policy,
|
||||
preserve_program_paths,
|
||||
sandbox_state: Arc::new(RwLock::new(None)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Runs a shell command and returns its output. You MUST provide the workdir as an absolute path.
|
||||
#[tool]
|
||||
async fn shell(
|
||||
&self,
|
||||
context: RequestContext<RoleServer>,
|
||||
Parameters(params): Parameters<ExecParams>,
|
||||
) -> Result<CallToolResult, McpError> {
|
||||
let effective_timeout = Duration::from_millis(
|
||||
params
|
||||
.timeout_ms
|
||||
.unwrap_or(codex_core::exec::DEFAULT_EXEC_COMMAND_TIMEOUT_MS),
|
||||
);
|
||||
let sandbox_state =
|
||||
self.sandbox_state
|
||||
.read()
|
||||
.await
|
||||
.clone()
|
||||
.unwrap_or_else(|| SandboxState {
|
||||
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
||||
codex_linux_sandbox_exe: None,
|
||||
sandbox_cwd: PathBuf::from(¶ms.workdir),
|
||||
use_linux_sandbox_bwrap: false,
|
||||
});
|
||||
let result = run_escalate_server(
|
||||
params.into(),
|
||||
&sandbox_state,
|
||||
&self.bash_path,
|
||||
&self.execve_wrapper,
|
||||
self.policy.clone(),
|
||||
McpEscalationPolicyFactory {
|
||||
context,
|
||||
preserve_program_paths: self.preserve_program_paths,
|
||||
},
|
||||
effective_timeout,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| McpError::internal_error(e.to_string(), None))?;
|
||||
Ok(CallToolResult::success(vec![Content::json(
|
||||
ExecResult::from(result),
|
||||
)?]))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct CodexSandboxStateUpdateMethod;
|
||||
|
||||
impl rmcp::model::ConstString for CodexSandboxStateUpdateMethod {
|
||||
const VALUE: &'static str = MCP_SANDBOX_STATE_METHOD;
|
||||
}
|
||||
|
||||
#[tool_handler]
|
||||
impl ServerHandler for ExecTool {
|
||||
fn get_info(&self) -> ServerInfo {
|
||||
let mut experimental_capabilities = ExperimentalCapabilities::new();
|
||||
let mut sandbox_state_capability = JsonObject::new();
|
||||
sandbox_state_capability.insert(
|
||||
"version".to_string(),
|
||||
serde_json::Value::String(SANDBOX_STATE_CAPABILITY_VERSION.to_string()),
|
||||
);
|
||||
experimental_capabilities.insert(
|
||||
MCP_SANDBOX_STATE_CAPABILITY.to_string(),
|
||||
sandbox_state_capability,
|
||||
);
|
||||
ServerInfo {
|
||||
protocol_version: ProtocolVersion::V_2025_06_18,
|
||||
capabilities: ServerCapabilities::builder()
|
||||
.enable_tools()
|
||||
.enable_experimental_with(experimental_capabilities)
|
||||
.build(),
|
||||
server_info: Implementation::from_build_env(),
|
||||
instructions: Some(
|
||||
"This server provides a tool to execute shell commands and return their output."
|
||||
.to_string(),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
async fn initialize(
|
||||
&self,
|
||||
_request: InitializeRequestParams,
|
||||
_context: RequestContext<RoleServer>,
|
||||
) -> Result<InitializeResult, McpError> {
|
||||
Ok(self.get_info())
|
||||
}
|
||||
|
||||
async fn on_custom_request(
|
||||
&self,
|
||||
request: CustomRequest,
|
||||
_context: rmcp::service::RequestContext<rmcp::RoleServer>,
|
||||
) -> Result<CustomResult, McpError> {
|
||||
let CustomRequest { method, params, .. } = request;
|
||||
if method != MCP_SANDBOX_STATE_METHOD {
|
||||
return Err(McpError::method_not_found::<CodexSandboxStateUpdateMethod>());
|
||||
}
|
||||
|
||||
let Some(params) = params else {
|
||||
return Err(McpError::invalid_params(
|
||||
"missing params for sandbox state request".to_string(),
|
||||
None,
|
||||
));
|
||||
};
|
||||
|
||||
let Ok(sandbox_state) = serde_json::from_value::<SandboxState>(params.clone()) else {
|
||||
return Err(McpError::invalid_params(
|
||||
"failed to deserialize sandbox state".to_string(),
|
||||
Some(params),
|
||||
));
|
||||
};
|
||||
|
||||
*self.sandbox_state.write().await = Some(sandbox_state);
|
||||
|
||||
Ok(CustomResult::new(json!({})))
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn serve(
|
||||
bash_path: PathBuf,
|
||||
execve_wrapper: PathBuf,
|
||||
policy: Arc<RwLock<Policy>>,
|
||||
preserve_program_paths: bool,
|
||||
) -> Result<RunningService<RoleServer, ExecTool>, rmcp::service::ServerInitializeError> {
|
||||
let tool = ExecTool::new(bash_path, execve_wrapper, policy, preserve_program_paths);
|
||||
tool.serve(stdio()).await
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
|
||||
/// Verify that the way we use serde does not compromise the desired JSON
|
||||
/// schema via schemars. In particular, ensure that the `login` and
|
||||
/// `timeout_ms` fields are optional.
|
||||
#[test]
|
||||
fn exec_params_json_schema_matches_expected() {
|
||||
let schema = rmcp::schemars::schema_for!(ExecParams);
|
||||
let actual = serde_json::to_value(schema).expect("schema should serialize");
|
||||
|
||||
assert_eq!(
|
||||
actual,
|
||||
json!({
|
||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||
"title": "ExecParams",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"command": {
|
||||
"description": "The bash string to execute.",
|
||||
"type": "string"
|
||||
},
|
||||
"login": {
|
||||
"description": "Launch Bash with -lc instead of -c: defaults to true.",
|
||||
"type": ["boolean", "null"]
|
||||
},
|
||||
"timeout_ms": {
|
||||
"description": "The timeout for the command in milliseconds.",
|
||||
"format": "uint64",
|
||||
"minimum": 0,
|
||||
"type": ["integer", "null"]
|
||||
},
|
||||
"workdir": {
|
||||
"description":
|
||||
"The working directory to execute the command in. Must be an absolute path.",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": ["command", "workdir"]
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,150 +0,0 @@
|
||||
use std::path::Path;
|
||||
|
||||
use codex_core::sandboxing::SandboxPermissions;
|
||||
use codex_execpolicy::Policy;
|
||||
use codex_shell_escalation::EscalateAction;
|
||||
use codex_shell_escalation::EscalationPolicy;
|
||||
use codex_shell_escalation::Stopwatch;
|
||||
use rmcp::ErrorData as McpError;
|
||||
use rmcp::RoleServer;
|
||||
use rmcp::model::CreateElicitationRequestParams;
|
||||
use rmcp::model::CreateElicitationResult;
|
||||
use rmcp::model::ElicitationAction;
|
||||
use rmcp::model::ElicitationSchema;
|
||||
use rmcp::service::RequestContext;
|
||||
use shlex::try_join;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub(crate) enum ExecPolicyOutcome {
|
||||
Allow {
|
||||
sandbox_permissions: SandboxPermissions,
|
||||
},
|
||||
Prompt {
|
||||
sandbox_permissions: SandboxPermissions,
|
||||
},
|
||||
Forbidden,
|
||||
}
|
||||
|
||||
/// ExecPolicy with access to the MCP RequestContext so that it can leverage
|
||||
/// elicitations.
|
||||
pub(crate) struct McpEscalationPolicy {
|
||||
/// In-memory execpolicy rules that drive how to handle an exec() call.
|
||||
policy: Arc<RwLock<Policy>>,
|
||||
context: RequestContext<RoleServer>,
|
||||
stopwatch: Stopwatch,
|
||||
preserve_program_paths: bool,
|
||||
}
|
||||
|
||||
impl McpEscalationPolicy {
|
||||
pub(crate) fn new(
|
||||
policy: Arc<RwLock<Policy>>,
|
||||
context: RequestContext<RoleServer>,
|
||||
stopwatch: Stopwatch,
|
||||
preserve_program_paths: bool,
|
||||
) -> Self {
|
||||
Self {
|
||||
policy,
|
||||
context,
|
||||
stopwatch,
|
||||
preserve_program_paths,
|
||||
}
|
||||
}
|
||||
|
||||
async fn prompt(
|
||||
&self,
|
||||
file: &Path,
|
||||
argv: &[String],
|
||||
workdir: &Path,
|
||||
context: RequestContext<RoleServer>,
|
||||
) -> Result<CreateElicitationResult, McpError> {
|
||||
let args = try_join(argv.iter().skip(1).map(String::as_str)).unwrap_or_default();
|
||||
let command = if args.is_empty() {
|
||||
file.display().to_string()
|
||||
} else {
|
||||
format!("{} {}", file.display(), args)
|
||||
};
|
||||
self.stopwatch
|
||||
.pause_for(async {
|
||||
context
|
||||
.peer
|
||||
.create_elicitation(CreateElicitationRequestParams::FormElicitationParams {
|
||||
meta: None,
|
||||
message: format!(
|
||||
"Allow agent to run `{command}` in `{}`?",
|
||||
workdir.display()
|
||||
),
|
||||
requested_schema: ElicitationSchema::builder()
|
||||
.title("Execution Permission Request")
|
||||
.optional_string_with("reason", |schema| {
|
||||
schema.description(
|
||||
"Optional reason for allowing or denying execution",
|
||||
)
|
||||
})
|
||||
.build()
|
||||
.map_err(|e| {
|
||||
McpError::internal_error(
|
||||
format!("failed to build elicitation schema: {e}"),
|
||||
None,
|
||||
)
|
||||
})?,
|
||||
})
|
||||
.await
|
||||
.map_err(|e| McpError::internal_error(e.to_string(), None))
|
||||
})
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl EscalationPolicy for McpEscalationPolicy {
|
||||
async fn determine_action(
|
||||
&self,
|
||||
file: &Path,
|
||||
argv: &[String],
|
||||
workdir: &Path,
|
||||
) -> anyhow::Result<EscalateAction> {
|
||||
let policy = self.policy.read().await;
|
||||
let outcome =
|
||||
crate::unix::evaluate_exec_policy(&policy, file, argv, self.preserve_program_paths)?;
|
||||
let action = match outcome {
|
||||
ExecPolicyOutcome::Allow {
|
||||
sandbox_permissions,
|
||||
} => {
|
||||
if sandbox_permissions.requires_escalated_permissions() {
|
||||
EscalateAction::Escalate
|
||||
} else {
|
||||
EscalateAction::Run
|
||||
}
|
||||
}
|
||||
ExecPolicyOutcome::Prompt {
|
||||
sandbox_permissions,
|
||||
} => {
|
||||
let result = self
|
||||
.prompt(file, argv, workdir, self.context.clone())
|
||||
.await?;
|
||||
// TODO: Extract reason from `result.content`.
|
||||
match result.action {
|
||||
ElicitationAction::Accept => {
|
||||
if sandbox_permissions.requires_escalated_permissions() {
|
||||
EscalateAction::Escalate
|
||||
} else {
|
||||
EscalateAction::Run
|
||||
}
|
||||
}
|
||||
ElicitationAction::Decline => EscalateAction::Deny {
|
||||
reason: Some("User declined execution".to_string()),
|
||||
},
|
||||
ElicitationAction::Cancel => EscalateAction::Deny {
|
||||
reason: Some("User cancelled execution".to_string()),
|
||||
},
|
||||
}
|
||||
}
|
||||
ExecPolicyOutcome::Forbidden => EscalateAction::Deny {
|
||||
reason: Some("Execution forbidden by policy".to_string()),
|
||||
},
|
||||
};
|
||||
Ok(action)
|
||||
}
|
||||
}
|
||||
@@ -1,3 +0,0 @@
|
||||
// Single integration test binary that aggregates all test modules.
|
||||
// The submodules live in `tests/suite/`.
|
||||
mod suite;
|
||||
@@ -1,7 +0,0 @@
|
||||
load("//:defs.bzl", "codex_rust_crate")
|
||||
|
||||
codex_rust_crate(
|
||||
name = "common",
|
||||
crate_name = "exec_server_test_support",
|
||||
crate_srcs = glob(["*.rs"]),
|
||||
)
|
||||
@@ -1,17 +0,0 @@
|
||||
[package]
|
||||
name = "exec_server_test_support"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[lib]
|
||||
path = "lib.rs"
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
codex-core = { workspace = true }
|
||||
codex-protocol = { workspace = true }
|
||||
codex-utils-cargo-bin = { workspace = true }
|
||||
rmcp = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
@@ -1,201 +0,0 @@
|
||||
use codex_core::MCP_SANDBOX_STATE_METHOD;
|
||||
use codex_core::SandboxState;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use codex_utils_cargo_bin::find_resource;
|
||||
use rmcp::ClientHandler;
|
||||
use rmcp::ErrorData as McpError;
|
||||
use rmcp::RoleClient;
|
||||
use rmcp::Service;
|
||||
use rmcp::model::ClientCapabilities;
|
||||
use rmcp::model::ClientInfo;
|
||||
use rmcp::model::ClientRequest;
|
||||
use rmcp::model::CreateElicitationRequestParams;
|
||||
use rmcp::model::CreateElicitationResult;
|
||||
use rmcp::model::CustomRequest;
|
||||
use rmcp::model::ElicitationAction;
|
||||
use rmcp::model::ServerResult;
|
||||
use rmcp::service::RunningService;
|
||||
use rmcp::transport::ConfigureCommandExt;
|
||||
use rmcp::transport::TokioChildProcess;
|
||||
use serde_json::json;
|
||||
use std::collections::HashSet;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Stdio;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex;
|
||||
use tokio::process::Command;
|
||||
|
||||
pub async fn create_transport<P>(
|
||||
codex_home: P,
|
||||
dotslash_cache: P,
|
||||
) -> anyhow::Result<TokioChildProcess>
|
||||
where
|
||||
P: AsRef<Path>,
|
||||
{
|
||||
// `bash` is a test resource rather than a binary target, so we must use
|
||||
// `find_resource!` to locate it instead of `cargo_bin()`.
|
||||
let bash = find_resource!("../suite/bash")?;
|
||||
|
||||
// Need to ensure the artifact associated with the bash DotSlash file is
|
||||
// available before it is run in a read-only sandbox.
|
||||
let status = Command::new("dotslash")
|
||||
.arg("--")
|
||||
.arg("fetch")
|
||||
.arg(bash.clone())
|
||||
.env("DOTSLASH_CACHE", dotslash_cache.as_ref())
|
||||
.status()
|
||||
.await?;
|
||||
assert!(status.success(), "dotslash fetch failed: {status:?}");
|
||||
|
||||
create_transport_with_shell_path(codex_home, dotslash_cache, bash).await
|
||||
}
|
||||
|
||||
pub async fn create_transport_with_shell_path<P, Q, R>(
|
||||
codex_home: P,
|
||||
dotslash_cache: Q,
|
||||
shell_path: R,
|
||||
) -> anyhow::Result<TokioChildProcess>
|
||||
where
|
||||
P: AsRef<Path>,
|
||||
Q: AsRef<Path>,
|
||||
R: AsRef<Path>,
|
||||
{
|
||||
let mcp_executable = codex_utils_cargo_bin::cargo_bin("codex-exec-mcp-server")?;
|
||||
let execve_wrapper = codex_utils_cargo_bin::cargo_bin("codex-execve-wrapper")?;
|
||||
|
||||
let transport = TokioChildProcess::new(Command::new(&mcp_executable).configure(|cmd| {
|
||||
cmd.arg("--bash").arg(shell_path.as_ref());
|
||||
cmd.arg("--execve").arg(&execve_wrapper);
|
||||
cmd.env("CODEX_HOME", codex_home.as_ref());
|
||||
cmd.env("DOTSLASH_CACHE", dotslash_cache.as_ref());
|
||||
|
||||
// Important: pipe stdio so rmcp can speak JSON-RPC over stdin/stdout
|
||||
cmd.stdin(Stdio::piped());
|
||||
cmd.stdout(Stdio::piped());
|
||||
|
||||
// Optional but very helpful while debugging:
|
||||
cmd.stderr(Stdio::inherit());
|
||||
}))?;
|
||||
|
||||
Ok(transport)
|
||||
}
|
||||
|
||||
pub async fn write_default_execpolicy<P>(policy: &str, codex_home: P) -> anyhow::Result<()>
|
||||
where
|
||||
P: AsRef<Path>,
|
||||
{
|
||||
let policy_dir = codex_home.as_ref().join("rules");
|
||||
tokio::fs::create_dir_all(&policy_dir).await?;
|
||||
tokio::fs::write(policy_dir.join("default.rules"), policy).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn notify_readable_sandbox<P, S>(
|
||||
sandbox_cwd: P,
|
||||
codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
service: &RunningService<RoleClient, S>,
|
||||
) -> anyhow::Result<ServerResult>
|
||||
where
|
||||
P: AsRef<Path>,
|
||||
S: Service<RoleClient> + ClientHandler,
|
||||
{
|
||||
let sandbox_state = SandboxState {
|
||||
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
||||
codex_linux_sandbox_exe,
|
||||
sandbox_cwd: sandbox_cwd.as_ref().to_path_buf(),
|
||||
use_linux_sandbox_bwrap: false,
|
||||
};
|
||||
send_sandbox_state_update(sandbox_state, service).await
|
||||
}
|
||||
|
||||
pub async fn notify_writable_sandbox_only_one_folder<P, S>(
|
||||
writable_folder: P,
|
||||
codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
service: &RunningService<RoleClient, S>,
|
||||
) -> anyhow::Result<ServerResult>
|
||||
where
|
||||
P: AsRef<Path>,
|
||||
S: Service<RoleClient> + ClientHandler,
|
||||
{
|
||||
let sandbox_state = SandboxState {
|
||||
sandbox_policy: SandboxPolicy::WorkspaceWrite {
|
||||
// Note that sandbox_cwd will already be included as a writable root
|
||||
// when the sandbox policy is expanded.
|
||||
writable_roots: vec![],
|
||||
read_only_access: Default::default(),
|
||||
network_access: false,
|
||||
// Disable writes to temp dir because this is a test, so
|
||||
// writable_folder is likely also under /tmp and we want to be
|
||||
// strict about what is writable.
|
||||
exclude_tmpdir_env_var: true,
|
||||
exclude_slash_tmp: true,
|
||||
},
|
||||
codex_linux_sandbox_exe,
|
||||
sandbox_cwd: writable_folder.as_ref().to_path_buf(),
|
||||
use_linux_sandbox_bwrap: false,
|
||||
};
|
||||
send_sandbox_state_update(sandbox_state, service).await
|
||||
}
|
||||
|
||||
async fn send_sandbox_state_update<S>(
|
||||
sandbox_state: SandboxState,
|
||||
service: &RunningService<RoleClient, S>,
|
||||
) -> anyhow::Result<ServerResult>
|
||||
where
|
||||
S: Service<RoleClient> + ClientHandler,
|
||||
{
|
||||
let response = service
|
||||
.send_request(ClientRequest::CustomRequest(CustomRequest::new(
|
||||
MCP_SANDBOX_STATE_METHOD,
|
||||
Some(serde_json::to_value(sandbox_state)?),
|
||||
)))
|
||||
.await?;
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
pub struct InteractiveClient {
|
||||
pub elicitations_to_accept: HashSet<String>,
|
||||
pub elicitation_requests: Arc<Mutex<Vec<CreateElicitationRequestParams>>>,
|
||||
}
|
||||
|
||||
impl ClientHandler for InteractiveClient {
|
||||
fn get_info(&self) -> ClientInfo {
|
||||
let capabilities = ClientCapabilities::builder().enable_elicitation().build();
|
||||
ClientInfo {
|
||||
capabilities,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
fn create_elicitation(
|
||||
&self,
|
||||
request: CreateElicitationRequestParams,
|
||||
_context: rmcp::service::RequestContext<RoleClient>,
|
||||
) -> impl std::future::Future<Output = Result<CreateElicitationResult, McpError>> + Send + '_
|
||||
{
|
||||
self.elicitation_requests
|
||||
.lock()
|
||||
.unwrap()
|
||||
.push(request.clone());
|
||||
|
||||
let message = match &request {
|
||||
CreateElicitationRequestParams::FormElicitationParams { message, .. }
|
||||
| CreateElicitationRequestParams::UrlElicitationParams { message, .. } => message,
|
||||
};
|
||||
let accept = self.elicitations_to_accept.contains(message);
|
||||
async move {
|
||||
if accept {
|
||||
Ok(CreateElicitationResult {
|
||||
action: ElicitationAction::Accept,
|
||||
content: Some(json!({ "approve": true })),
|
||||
})
|
||||
} else {
|
||||
Ok(CreateElicitationResult {
|
||||
action: ElicitationAction::Decline,
|
||||
content: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,240 +0,0 @@
|
||||
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
||||
use std::borrow::Cow;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex;
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use anyhow::ensure;
|
||||
use codex_exec_server::ExecResult;
|
||||
use exec_server_test_support::InteractiveClient;
|
||||
use exec_server_test_support::create_transport;
|
||||
use exec_server_test_support::create_transport_with_shell_path;
|
||||
use exec_server_test_support::notify_readable_sandbox;
|
||||
use exec_server_test_support::write_default_execpolicy;
|
||||
use maplit::hashset;
|
||||
use pretty_assertions::assert_eq;
|
||||
use rmcp::ServiceExt;
|
||||
use rmcp::model::CallToolRequestParams;
|
||||
use rmcp::model::CallToolResult;
|
||||
use rmcp::model::CreateElicitationRequestParams;
|
||||
use rmcp::model::EmptyResult;
|
||||
use rmcp::model::ServerResult;
|
||||
use rmcp::model::object;
|
||||
use serde_json::json;
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
use std::os::unix::fs::symlink;
|
||||
use tempfile::TempDir;
|
||||
use tokio::process::Command;
|
||||
|
||||
const USE_LOGIN_SHELL: bool = false;
|
||||
|
||||
/// Verify that when using a read-only sandbox and an execpolicy that prompts,
|
||||
/// the proper elicitation is sent. Upon auto-approving the elicitation, the
|
||||
/// command should be run privileged outside the sandbox.
|
||||
#[tokio::test(flavor = "current_thread")]
|
||||
async fn accept_elicitation_for_prompt_rule() -> Result<()> {
|
||||
// Configure a stdio transport that will launch the MCP server using
|
||||
// $CODEX_HOME with an execpolicy that prompts for `git init` commands.
|
||||
let codex_home = TempDir::new()?;
|
||||
write_default_execpolicy(
|
||||
r#"
|
||||
# Create a rule with `decision = "prompt"` to exercise the elicitation flow.
|
||||
prefix_rule(
|
||||
pattern = ["git", "init"],
|
||||
decision = "prompt",
|
||||
match = [
|
||||
"git init ."
|
||||
],
|
||||
)
|
||||
"#,
|
||||
codex_home.as_ref(),
|
||||
)
|
||||
.await?;
|
||||
let dotslash_cache_temp_dir = TempDir::new()?;
|
||||
let dotslash_cache = dotslash_cache_temp_dir.path();
|
||||
let transport = create_transport(codex_home.as_ref(), dotslash_cache).await?;
|
||||
run_accept_elicitation_for_prompt_rule_with_transport(transport).await
|
||||
}
|
||||
|
||||
/// Verify the same prompt/escalation flow works when the server is launched
|
||||
/// with a patched zsh binary.
|
||||
///
|
||||
/// The suite resolves `tests/suite/zsh` via DotSlash on first use.
|
||||
#[tokio::test(flavor = "current_thread")]
|
||||
async fn accept_elicitation_for_prompt_rule_with_zsh() -> Result<()> {
|
||||
let codex_home = TempDir::new()?;
|
||||
write_default_execpolicy(
|
||||
r#"
|
||||
# Create a rule with `decision = "prompt"` to exercise the elicitation flow.
|
||||
prefix_rule(
|
||||
pattern = ["git", "init"],
|
||||
decision = "prompt",
|
||||
match = [
|
||||
"git init ."
|
||||
],
|
||||
)
|
||||
"#,
|
||||
codex_home.as_ref(),
|
||||
)
|
||||
.await?;
|
||||
let dotslash_cache_temp_dir = TempDir::new()?;
|
||||
let dotslash_cache = dotslash_cache_temp_dir.path();
|
||||
let zsh_path = resolve_test_zsh_path(dotslash_cache).await?;
|
||||
eprintln!(
|
||||
"using zsh path for exec-server test: {}",
|
||||
zsh_path.display()
|
||||
);
|
||||
let transport =
|
||||
create_transport_with_shell_path(codex_home.as_ref(), dotslash_cache, &zsh_path).await?;
|
||||
run_accept_elicitation_for_prompt_rule_with_transport(transport).await
|
||||
}
|
||||
|
||||
async fn run_accept_elicitation_for_prompt_rule_with_transport(
|
||||
transport: rmcp::transport::TokioChildProcess,
|
||||
) -> Result<()> {
|
||||
// Create an MCP client that approves the expected elicitation message.
|
||||
let project_root = TempDir::new()?;
|
||||
let project_root_path = project_root.path().canonicalize().unwrap();
|
||||
let git_path = resolve_git_path(USE_LOGIN_SHELL).await?;
|
||||
let git_init_command = format!("{git_path} init --quiet .");
|
||||
let expected_elicitation_message = format!(
|
||||
"Allow agent to run `{git_path} init --quiet .` in `{}`?",
|
||||
project_root_path.display()
|
||||
);
|
||||
let elicitation_requests: Arc<Mutex<Vec<CreateElicitationRequestParams>>> = Default::default();
|
||||
let client = InteractiveClient {
|
||||
elicitations_to_accept: hashset! { expected_elicitation_message.clone() },
|
||||
elicitation_requests: elicitation_requests.clone(),
|
||||
};
|
||||
|
||||
// Start the MCP server.
|
||||
let service: rmcp::service::RunningService<rmcp::RoleClient, InteractiveClient> =
|
||||
client.serve(transport).await?;
|
||||
|
||||
// Notify the MCP server about the current sandbox state before making any
|
||||
// `shell` tool calls.
|
||||
let linux_sandbox_exe_folder = TempDir::new()?;
|
||||
let codex_linux_sandbox_exe = if cfg!(target_os = "linux") {
|
||||
let codex_linux_sandbox_exe = linux_sandbox_exe_folder.path().join("codex-linux-sandbox");
|
||||
let codex_cli = ensure_codex_cli()?;
|
||||
symlink(&codex_cli, &codex_linux_sandbox_exe)?;
|
||||
Some(codex_linux_sandbox_exe)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let response =
|
||||
notify_readable_sandbox(&project_root_path, codex_linux_sandbox_exe, &service).await?;
|
||||
let ServerResult::EmptyResult(EmptyResult {}) = response else {
|
||||
panic!("expected EmptyResult from sandbox state notification but found: {response:?}");
|
||||
};
|
||||
|
||||
// Call the shell tool and verify that an elicitation was created and
|
||||
// auto-approved.
|
||||
let CallToolResult {
|
||||
content, is_error, ..
|
||||
} = service
|
||||
.call_tool(CallToolRequestParams {
|
||||
meta: None,
|
||||
name: Cow::Borrowed("shell"),
|
||||
arguments: Some(object(json!(
|
||||
{
|
||||
"login": USE_LOGIN_SHELL,
|
||||
"command": git_init_command,
|
||||
"workdir": project_root_path.to_string_lossy(),
|
||||
}
|
||||
))),
|
||||
task: None,
|
||||
})
|
||||
.await?;
|
||||
let tool_call_content = content
|
||||
.first()
|
||||
.expect("expected non-empty content")
|
||||
.as_text()
|
||||
.expect("expected text content");
|
||||
let ExecResult {
|
||||
exit_code, output, ..
|
||||
} = serde_json::from_str::<ExecResult>(&tool_call_content.text)?;
|
||||
// `git init --quiet` is expected to suppress the usual initialization
|
||||
// banner, so assert on success and filesystem effects instead of output.
|
||||
assert!(
|
||||
output.is_empty(),
|
||||
"expected no output from `git init --quiet .`, got `{output}`"
|
||||
);
|
||||
assert_eq!(exit_code, 0, "command should succeed");
|
||||
assert_eq!(is_error, Some(false), "command should succeed");
|
||||
assert!(
|
||||
project_root_path.join(".git").is_dir(),
|
||||
"git repo should exist"
|
||||
);
|
||||
|
||||
let elicitation_messages = elicitation_requests
|
||||
.lock()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|r| match r {
|
||||
rmcp::model::CreateElicitationRequestParams::FormElicitationParams {
|
||||
message, ..
|
||||
}
|
||||
| rmcp::model::CreateElicitationRequestParams::UrlElicitationParams {
|
||||
message, ..
|
||||
} => message.clone(),
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(vec![expected_elicitation_message], elicitation_messages);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn resolve_test_zsh_path(dotslash_cache: &std::path::Path) -> Result<PathBuf> {
|
||||
let dotslash_zsh = codex_utils_cargo_bin::find_resource!("tests/suite/zsh")?;
|
||||
core_test_support::fetch_dotslash_file(&dotslash_zsh, Some(dotslash_cache))
|
||||
.with_context(|| format!("failed to fetch test zsh from {}", dotslash_zsh.display()))
|
||||
}
|
||||
|
||||
fn ensure_codex_cli() -> Result<PathBuf> {
|
||||
let codex_cli = codex_utils_cargo_bin::cargo_bin("codex")?;
|
||||
|
||||
let metadata = codex_cli.metadata().with_context(|| {
|
||||
format!(
|
||||
"failed to read metadata for codex binary at {}",
|
||||
codex_cli.display()
|
||||
)
|
||||
})?;
|
||||
ensure!(
|
||||
metadata.is_file(),
|
||||
"expected codex binary at {} to be a file; run `cargo build -p codex-cli --bin codex` before this test",
|
||||
codex_cli.display()
|
||||
);
|
||||
|
||||
let mode = metadata.permissions().mode();
|
||||
ensure!(
|
||||
mode & 0o111 != 0,
|
||||
"codex binary at {} is not executable (mode {mode:o}); run `cargo build -p codex-cli --bin codex` before this test",
|
||||
codex_cli.display()
|
||||
);
|
||||
|
||||
Ok(codex_cli)
|
||||
}
|
||||
|
||||
async fn resolve_git_path(use_login_shell: bool) -> Result<String> {
|
||||
let bash_flag = if use_login_shell { "-lc" } else { "-c" };
|
||||
let git = Command::new("bash")
|
||||
.arg(bash_flag)
|
||||
.arg("command -v git")
|
||||
.output()
|
||||
.await
|
||||
.context("failed to resolve git via login shell")?;
|
||||
ensure!(
|
||||
git.status.success(),
|
||||
"failed to resolve git via login shell: {}",
|
||||
String::from_utf8_lossy(&git.stderr)
|
||||
);
|
||||
let git_path = String::from_utf8(git.stdout)
|
||||
.context("git path was not valid utf8")?
|
||||
.trim()
|
||||
.to_string();
|
||||
ensure!(!git_path.is_empty(), "git path should not be empty");
|
||||
Ok(git_path)
|
||||
}
|
||||
@@ -1,79 +0,0 @@
|
||||
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
||||
use std::borrow::Cow;
|
||||
use std::fs;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Result;
|
||||
use exec_server_test_support::create_transport;
|
||||
use pretty_assertions::assert_eq;
|
||||
use rmcp::ServiceExt;
|
||||
use rmcp::model::Tool;
|
||||
use rmcp::model::object;
|
||||
use serde_json::json;
|
||||
use tempfile::TempDir;
|
||||
|
||||
/// Verify the list_tools call to the MCP server returns the expected response.
|
||||
#[tokio::test(flavor = "current_thread")]
|
||||
async fn list_tools() -> Result<()> {
|
||||
let codex_home = TempDir::new()?;
|
||||
let policy_dir = codex_home.path().join("rules");
|
||||
fs::create_dir_all(&policy_dir)?;
|
||||
fs::write(
|
||||
policy_dir.join("default.rules"),
|
||||
r#"prefix_rule(pattern=["ls"], decision="prompt")"#,
|
||||
)?;
|
||||
let dotslash_cache_temp_dir = TempDir::new()?;
|
||||
let dotslash_cache = dotslash_cache_temp_dir.path();
|
||||
let transport = create_transport(codex_home.path(), dotslash_cache).await?;
|
||||
|
||||
let service = ().serve(transport).await?;
|
||||
let tools = service.list_tools(Default::default()).await?.tools;
|
||||
assert_eq!(
|
||||
vec![Tool {
|
||||
name: Cow::Borrowed("shell"),
|
||||
title: None,
|
||||
description: Some(Cow::Borrowed(
|
||||
"Runs a shell command and returns its output. You MUST provide the workdir as an absolute path."
|
||||
)),
|
||||
input_schema: Arc::new(object(json!({
|
||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||
"properties": {
|
||||
"command": {
|
||||
"description": "The bash string to execute.",
|
||||
"type": "string",
|
||||
},
|
||||
"login": {
|
||||
"description": "Launch Bash with -lc instead of -c: defaults to true.",
|
||||
"nullable": true,
|
||||
"type": "boolean",
|
||||
},
|
||||
"timeout_ms": {
|
||||
"description": "The timeout for the command in milliseconds.",
|
||||
"format": "uint64",
|
||||
"minimum": 0,
|
||||
"nullable": true,
|
||||
"type": "integer",
|
||||
},
|
||||
"workdir": {
|
||||
"description": "The working directory to execute the command in. Must be an absolute path.",
|
||||
"type": "string",
|
||||
},
|
||||
},
|
||||
"required": [
|
||||
"command",
|
||||
"workdir",
|
||||
],
|
||||
"title": "ExecParams",
|
||||
"type": "object",
|
||||
}))),
|
||||
output_schema: None,
|
||||
execution: None,
|
||||
annotations: None,
|
||||
icons: None,
|
||||
meta: None
|
||||
}],
|
||||
tools
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,4 +0,0 @@
|
||||
#[cfg(any(all(target_os = "macos", target_arch = "aarch64"), target_os = "linux"))]
|
||||
mod accept_elicitation;
|
||||
#[cfg(any(all(target_os = "macos", target_arch = "aarch64"), target_os = "linux"))]
|
||||
mod list_tools;
|
||||
@@ -4,9 +4,14 @@ version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[[bin]]
|
||||
name = "codex-execve-wrapper"
|
||||
path = "src/bin/main_execve_wrapper.rs"
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
async-trait = { workspace = true }
|
||||
clap = { workspace = true, features = ["derive"] }
|
||||
codex-core = { workspace = true }
|
||||
codex-execpolicy = { workspace = true }
|
||||
codex-protocol = { workspace = true }
|
||||
@@ -24,6 +29,7 @@ tokio = { workspace = true, features = [
|
||||
] }
|
||||
tokio-util = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
tracing-subscriber = { workspace = true, features = ["env-filter", "fmt"] }
|
||||
|
||||
[dev-dependencies]
|
||||
pretty_assertions = { workspace = true }
|
||||
|
||||
28
codex-rs/shell-escalation/README.md
Normal file
28
codex-rs/shell-escalation/README.md
Normal file
@@ -0,0 +1,28 @@
|
||||
# codex-shell-escalation
|
||||
|
||||
This crate contains the Unix shell-escalation protocol implementation and the
|
||||
`codex-execve-wrapper` executable.
|
||||
|
||||
`codex-execve-wrapper` receives the arguments to an intercepted `execve(2)` call and delegates the
|
||||
decision to the shell-escalation protocol over a shared file descriptor (specified by the
|
||||
`CODEX_ESCALATE_SOCKET` environment variable). The server on the other side replies with one of:
|
||||
|
||||
- `Run`: `codex-execve-wrapper` should invoke `execve(2)` on itself to run the original command
|
||||
within the sandboxed shell.
|
||||
- `Escalate`: forward the file descriptors of the current process so the command can be run
|
||||
faithfully outside the sandbox. When the process completes, the server forwards the exit code
|
||||
back to `codex-execve-wrapper`.
|
||||
- `Deny`: the server has declared the proposed command to be forbidden, so
|
||||
`codex-execve-wrapper` prints an error to `stderr` and exits with `1`.
|
||||
|
||||
## Patched Bash
|
||||
|
||||
We carry a small patch to `execute_cmd.c` (see `patches/bash-exec-wrapper.patch`) that adds support for `EXEC_WRAPPER`. The original commit message is “add support for BASH_EXEC_WRAPPER” and the patch applies cleanly to `a8a1c2fac029404d3f42cd39f5a20f24b6e4fe4b` from https://github.com/bminor/bash. To rebuild manually:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/bminor/bash
|
||||
git checkout a8a1c2fac029404d3f42cd39f5a20f24b6e4fe4b
|
||||
git apply /path/to/patches/bash-exec-wrapper.patch
|
||||
./configure --without-bash-malloc
|
||||
make -j"$(nproc)"
|
||||
```
|
||||
@@ -5,4 +5,4 @@ fn main() {
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
pub use codex_exec_server::main_execve_wrapper as main;
|
||||
pub use codex_shell_escalation::main_execve_wrapper as main;
|
||||
@@ -1,21 +1,21 @@
|
||||
#[cfg(unix)]
|
||||
mod unix {
|
||||
mod escalate_client;
|
||||
mod escalate_protocol;
|
||||
mod escalate_server;
|
||||
mod escalation_policy;
|
||||
mod socket;
|
||||
mod stopwatch;
|
||||
|
||||
pub use self::escalate_client::run;
|
||||
pub use self::escalate_protocol::EscalateAction;
|
||||
pub use self::escalate_server::EscalationPolicyFactory;
|
||||
pub use self::escalate_server::ExecParams;
|
||||
pub use self::escalate_server::ExecResult;
|
||||
pub use self::escalate_server::run_escalate_server;
|
||||
pub use self::escalation_policy::EscalationPolicy;
|
||||
pub use self::stopwatch::Stopwatch;
|
||||
}
|
||||
mod unix;
|
||||
|
||||
#[cfg(unix)]
|
||||
pub use unix::*;
|
||||
pub use unix::EscalateAction;
|
||||
#[cfg(unix)]
|
||||
pub use unix::EscalationPolicy;
|
||||
#[cfg(unix)]
|
||||
pub use unix::EscalationPolicyFactory;
|
||||
#[cfg(unix)]
|
||||
pub use unix::ExecParams;
|
||||
#[cfg(unix)]
|
||||
pub use unix::ExecResult;
|
||||
#[cfg(unix)]
|
||||
pub use unix::Stopwatch;
|
||||
#[cfg(unix)]
|
||||
pub use unix::main_execve_wrapper;
|
||||
#[cfg(unix)]
|
||||
pub use unix::run;
|
||||
#[cfg(unix)]
|
||||
pub use unix::run_escalate_server;
|
||||
|
||||
@@ -3,11 +3,11 @@ use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::escalate_protocol::EscalateAction;
|
||||
use crate::escalation_policy::EscalationPolicy;
|
||||
use crate::stopwatch::Stopwatch;
|
||||
use crate::unix::escalate_server::EscalationPolicyFactory;
|
||||
use codex_execpolicy::Policy;
|
||||
use super::escalate_protocol::EscalateAction;
|
||||
use super::escalate_server::EscalationPolicyFactory;
|
||||
use super::escalation_policy::EscalationPolicy;
|
||||
use super::stopwatch::Stopwatch;
|
||||
|
||||
#[async_trait]
|
||||
pub trait ShellActionProvider: Send + Sync {
|
||||
|
||||
@@ -5,7 +5,7 @@ use std::path::PathBuf;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
/// 'exec-server escalate' reads this to find the inherited FD for the escalate socket.
|
||||
/// Exec wrappers read this to find the inherited FD for the escalation socket.
|
||||
pub const ESCALATE_SOCKET_ENV_VAR: &str = "CODEX_ESCALATE_SOCKET";
|
||||
|
||||
/// Patched shells use this to wrap exec() calls.
|
||||
|
||||
25
codex-rs/shell-escalation/src/unix/execve_wrapper.rs
Normal file
25
codex-rs/shell-escalation/src/unix/execve_wrapper.rs
Normal file
@@ -0,0 +1,25 @@
|
||||
//! Entrypoints for execve interception helper binaries.
|
||||
|
||||
use clap::Parser;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
#[derive(Parser)]
|
||||
pub struct ExecveWrapperCli {
|
||||
file: String,
|
||||
|
||||
#[arg(trailing_var_arg = true)]
|
||||
argv: Vec<String>,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
pub async fn main_execve_wrapper() -> anyhow::Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(EnvFilter::from_default_env())
|
||||
.with_writer(std::io::stderr)
|
||||
.with_ansi(false)
|
||||
.init();
|
||||
|
||||
let ExecveWrapperCli { file, argv } = ExecveWrapperCli::parse();
|
||||
let exit_code = crate::run(file, argv).await?;
|
||||
std::process::exit(exit_code);
|
||||
}
|
||||
@@ -1,7 +1,72 @@
|
||||
//! Unix shell-escalation protocol implementation.
|
||||
//!
|
||||
//! A patched shell invokes an exec wrapper on every `exec()` attempt. The wrapper sends an
|
||||
//! `EscalateRequest` over the inherited `CODEX_ESCALATE_SOCKET`, and the server decides whether to
|
||||
//! run the command directly (`Run`) or execute it on the server side (`Escalate`).
|
||||
//!
|
||||
//! Of key importance is the `EscalateRequest` includes a file descriptor for a socket
|
||||
//! that the server can use to send the response to the execve wrapper. In this
|
||||
//! way, all descendents of the Server process can use the file descriptor
|
||||
//! specified by the `CODEX_ESCALATE_SOCKET` environment variable to _send_ escalation requests,
|
||||
//! but responses are read from a separate socket that is created for each request, which
|
||||
//! allows the server to handle multiple concurrent escalation requests.
|
||||
//!
|
||||
//! ### Escalation flow
|
||||
//!
|
||||
//! Command Server Shell Execve Wrapper
|
||||
//! |
|
||||
//! o----->o
|
||||
//! | |
|
||||
//! | o--(exec)-->o
|
||||
//! | | |
|
||||
//! |o<-(EscalateReq)--o
|
||||
//! || | |
|
||||
//! |o--(Escalate)---->o
|
||||
//! || | |
|
||||
//! |o<---------(fds)--o
|
||||
//! || | |
|
||||
//! o<------o | |
|
||||
//! | || | |
|
||||
//! x------>o | |
|
||||
//! || | |
|
||||
//! |x--(exit code)--->o
|
||||
//! | | |
|
||||
//! | o<--(exit)--x
|
||||
//! | |
|
||||
//! o<-----x
|
||||
//!
|
||||
//! ### Non-escalation flow
|
||||
//!
|
||||
//! Server Shell Execve Wrapper Command
|
||||
//! |
|
||||
//! o----->o
|
||||
//! | |
|
||||
//! | o--(exec)-->o
|
||||
//! | | |
|
||||
//! |o<-(EscalateReq)--o
|
||||
//! || | |
|
||||
//! |o-(Run)---------->o
|
||||
//! | | |
|
||||
//! | | x--(exec)-->o
|
||||
//! | | |
|
||||
//! | o<--------------(exit)--x
|
||||
//! | |
|
||||
//! o<-----x
|
||||
//!
|
||||
pub mod escalate_client;
|
||||
pub mod escalate_protocol;
|
||||
pub mod escalate_server;
|
||||
pub mod escalation_policy;
|
||||
pub mod execve_wrapper;
|
||||
pub mod socket;
|
||||
pub mod core_shell_escalation;
|
||||
pub mod stopwatch;
|
||||
|
||||
pub use self::escalate_client::run;
|
||||
pub use self::escalate_protocol::EscalateAction;
|
||||
pub use self::escalate_server::EscalationPolicyFactory;
|
||||
pub use self::escalate_server::ExecParams;
|
||||
pub use self::escalate_server::ExecResult;
|
||||
pub use self::escalate_server::run_escalate_server;
|
||||
pub use self::escalation_policy::EscalationPolicy;
|
||||
pub use self::execve_wrapper::main_execve_wrapper;
|
||||
pub use self::stopwatch::Stopwatch;
|
||||
|
||||
@@ -96,10 +96,11 @@ The Codex harness (used by the CLI and the VS Code extension) sends such request
|
||||
|
||||
## Package Contents
|
||||
|
||||
This package wraps the `codex-exec-mcp-server` binary and its helpers so that the shell MCP can be invoked via `npx -y @openai/codex-shell-tool-mcp`. It bundles:
|
||||
This package currently publishes shell binaries only. It bundles:
|
||||
|
||||
- `codex-exec-mcp-server` and `codex-execve-wrapper` built for macOS (arm64, x64) and Linux (musl arm64, musl x64).
|
||||
- A patched Bash that honors `EXEC_WRAPPER`, built for multiple glibc baselines (Ubuntu 24.04/22.04/20.04, Debian 12/11, CentOS-like 9) and macOS (15/14/13).
|
||||
- A launcher (`bin/mcp-server.js`) that picks the correct binaries for the current `process.platform` / `process.arch`, specifying `--execve` and `--bash` for the MCP, as appropriate.
|
||||
- A patched zsh with `EXEC_WRAPPER` support for the same supported target triples.
|
||||
|
||||
See [the README in the Codex repo](https://github.com/openai/codex/blob/main/codex-rs/exec-server/README.md) for details.
|
||||
It does not currently include the Rust MCP server binaries.
|
||||
|
||||
See [the README in the Codex repo](https://github.com/openai/codex/blob/main/codex-rs/shell-escalation/README.md) for details.
|
||||
|
||||
@@ -1,16 +1,12 @@
|
||||
{
|
||||
"name": "@openai/codex-shell-tool-mcp",
|
||||
"version": "0.0.0-dev",
|
||||
"description": "Codex MCP server for the shell tool with patched Bash and exec wrappers.",
|
||||
"description": "Patched Bash and Zsh binaries for Codex shell execution.",
|
||||
"license": "Apache-2.0",
|
||||
"bin": {
|
||||
"codex-shell-tool-mcp": "bin/mcp-server.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
},
|
||||
"files": [
|
||||
"bin",
|
||||
"vendor",
|
||||
"README.md"
|
||||
],
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
// Launches the codex-exec-mcp-server binary bundled in this package.
|
||||
// Reports the path to the appropriate Bash binary bundled in this package.
|
||||
|
||||
import { spawn } from "node:child_process";
|
||||
import { accessSync, constants } from "node:fs";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { resolveBashPath } from "./bashSelection";
|
||||
@@ -12,8 +10,6 @@ async function main(): Promise<void> {
|
||||
const targetTriple = resolveTargetTriple(process.platform, process.arch);
|
||||
const vendorRoot = path.resolve(__dirname, "..", "vendor");
|
||||
const targetRoot = path.join(vendorRoot, targetTriple);
|
||||
const execveWrapperPath = path.join(targetRoot, "codex-execve-wrapper");
|
||||
const serverPath = path.join(targetRoot, "codex-exec-mcp-server");
|
||||
|
||||
const osInfo = process.platform === "linux" ? readOsRelease() : null;
|
||||
const { path: bashPath } = resolveBashPath(
|
||||
@@ -23,70 +19,7 @@ async function main(): Promise<void> {
|
||||
osInfo,
|
||||
);
|
||||
|
||||
[execveWrapperPath, serverPath, bashPath].forEach((checkPath) => {
|
||||
try {
|
||||
accessSync(checkPath, constants.F_OK);
|
||||
} catch {
|
||||
throw new Error(`Required binary missing: ${checkPath}`);
|
||||
}
|
||||
});
|
||||
|
||||
const args = [
|
||||
"--execve",
|
||||
execveWrapperPath,
|
||||
"--bash",
|
||||
bashPath,
|
||||
...process.argv.slice(2),
|
||||
];
|
||||
const child = spawn(serverPath, args, {
|
||||
stdio: "inherit",
|
||||
});
|
||||
|
||||
const forwardSignal = (signal: NodeJS.Signals) => {
|
||||
if (child.killed) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
child.kill(signal);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
};
|
||||
|
||||
(["SIGINT", "SIGTERM", "SIGHUP"] as const).forEach((sig) => {
|
||||
process.on(sig, () => forwardSignal(sig));
|
||||
});
|
||||
|
||||
child.on("error", (err) => {
|
||||
// eslint-disable-next-line no-console
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
const childResult = await new Promise<
|
||||
| { type: "signal"; signal: NodeJS.Signals }
|
||||
| { type: "code"; exitCode: number }
|
||||
>((resolve) => {
|
||||
child.on("exit", (code, signal) => {
|
||||
if (signal) {
|
||||
resolve({ type: "signal", signal });
|
||||
} else {
|
||||
resolve({ type: "code", exitCode: code ?? 1 });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
if (childResult.type === "signal") {
|
||||
// This environment running under `node --test` may not allow rethrowing a signal.
|
||||
// Wrap in a try to avoid masking the original termination reason.
|
||||
try {
|
||||
process.kill(process.pid, childResult.signal);
|
||||
} catch {
|
||||
process.exit(1);
|
||||
}
|
||||
} else {
|
||||
process.exit(childResult.exitCode);
|
||||
}
|
||||
console.log(`Platform Bash is: ${bashPath}`);
|
||||
}
|
||||
|
||||
void main().catch((err) => {
|
||||
|
||||
Reference in New Issue
Block a user