feat: declare server capability in shell-tool-mcp (#7112)

This introduces a new feature to Codex when it operates as an MCP
_client_ where if an MCP _server_ replies that it has an entry named
`"codex/sandbox-state"` in its _server capabilities_, then Codex will
send it an MCP notification with the following structure:

```json
{
  "method": "codex/sandbox-state/update",
  "params": {
    "sandboxPolicy": {
      "type": "workspace-write",
      "network-access": false,
      "exclude-tmpdir-env-var": false
      "exclude-slash-tmp": false
    },
    "codexLinuxSandboxExe": null,
    "sandboxCwd": "/Users/mbolin/code/codex2"
  }
}
```

or with whatever values are appropriate for the initial `sandboxPolicy`.

**NOTE:** Codex _should_ continue to send the MCP server notifications
of the same format if these things change over the lifetime of the
thread, but that isn't wired up yet.

The result is that `shell-tool-mcp` can consume these values so that
when it calls `codex_core::exec::process_exec_tool_call()` in
`codex-rs/exec-server/src/posix/escalate_server.rs`, it is now sure to
call it with the correct values (whereas previously we relied on
hardcoded values).

While I would argue this is a supported use case within the MCP
protocol, the `rmcp` crate that we are using today does not support
custom notifications. As such, I had to patch it and I submitted it for
review, so hopefully it will be accepted in some form:

https://github.com/modelcontextprotocol/rust-sdk/pull/556

To test out this change from end-to-end:

- I ran `cargo build` in `~/code/codex2/codex-rs/exec-server`
- I built the fork of Bash in `~/code/bash/bash`
- I added the following to my `~/.codex/config.toml`:

```toml
# Use with `codex --disable shell_tool`.
[mcp_servers.execshell]
args = ["--bash", "/Users/mbolin/code/bash/bash"]
command = "/Users/mbolin/code/codex2/codex-rs/target/debug/codex-exec-mcp-server"
```

- From `~/code/codex2/codex-rs`, I ran `just codex --disable shell_tool`
- When the TUI started up, I verified that the sandbox mode is
`workspace-write`
- I ran `/mcp` to verify that the shell tool from the MCP is there:

<img width="1387" height="1400" alt="image"
src="https://github.com/user-attachments/assets/1a8addcc-5005-4e16-b59f-95cfd06fd4ab"
/>

- Then I asked it:

> what is the output of `gh issue list`

because this should be auto-approved with our existing dummy policy:


af63e6eccc/codex-rs/exec-server/src/posix.rs (L157-L164)

And it worked:

<img width="1387" height="1400" alt="image"
src="https://github.com/user-attachments/assets/7568d2f7-80da-4d68-86d0-c265a6f5e6c1"
/>
This commit is contained in:
Michael Bolin
2025-11-21 16:11:01 -08:00
committed by GitHub
parent af63e6eccc
commit c6f68c9df8
8 changed files with 190 additions and 27 deletions

View File

@@ -1,8 +1,13 @@
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use anyhow::Context as _;
use anyhow::Result;
use codex_core::MCP_SANDBOX_STATE_CAPABILITY;
use codex_core::MCP_SANDBOX_STATE_NOTIFICATION;
use codex_core::SandboxState;
use codex_core::protocol::SandboxPolicy;
use rmcp::ErrorData as McpError;
use rmcp::RoleServer;
use rmcp::ServerHandler;
@@ -17,6 +22,8 @@ use rmcp::tool;
use rmcp::tool_handler;
use rmcp::tool_router;
use rmcp::transport::stdio;
use tokio::sync::RwLock;
use tracing::debug;
use crate::posix::escalate_server::EscalateServer;
use crate::posix::escalate_server::{self};
@@ -27,6 +34,8 @@ use crate::posix::stopwatch::Stopwatch;
/// Path to our patched bash.
const CODEX_BASH_PATH_ENV_VAR: &str = "CODEX_BASH_PATH";
const SANDBOX_STATE_CAPABILITY_VERSION: &str = "1.0.0";
pub(crate) fn get_bash_path() -> Result<PathBuf> {
std::env::var(CODEX_BASH_PATH_ENV_VAR)
.map(PathBuf::from)
@@ -70,6 +79,7 @@ pub struct ExecTool {
bash_path: PathBuf,
execve_wrapper: PathBuf,
policy: ExecPolicy,
sandbox_state: Arc<RwLock<Option<SandboxState>>>,
}
#[tool_router]
@@ -80,6 +90,7 @@ impl ExecTool {
bash_path,
execve_wrapper,
policy,
sandbox_state: Arc::new(RwLock::new(None)),
}
}
@@ -97,13 +108,24 @@ impl ExecTool {
);
let stopwatch = Stopwatch::new(effective_timeout);
let cancel_token = stopwatch.cancellation_token();
let sandbox_state =
self.sandbox_state
.read()
.await
.clone()
.unwrap_or_else(|| SandboxState {
sandbox_policy: SandboxPolicy::ReadOnly,
codex_linux_sandbox_exe: None,
sandbox_cwd: PathBuf::from(&params.workdir),
});
let escalate_server = EscalateServer::new(
self.bash_path.clone(),
self.execve_wrapper.clone(),
McpEscalationPolicy::new(self.policy, context, stopwatch.clone()),
);
let result = escalate_server
.exec(params, cancel_token)
.exec(params, cancel_token, &sandbox_state)
.await
.map_err(|e| McpError::internal_error(e.to_string(), None))?;
Ok(CallToolResult::success(vec![Content::json(
@@ -115,9 +137,22 @@ impl ExecTool {
#[tool_handler]
impl ServerHandler for ExecTool {
fn get_info(&self) -> ServerInfo {
let mut experimental_capabilities = ExperimentalCapabilities::new();
let mut sandbox_state_capability = JsonObject::new();
sandbox_state_capability.insert(
"version".to_string(),
serde_json::Value::String(SANDBOX_STATE_CAPABILITY_VERSION.to_string()),
);
experimental_capabilities.insert(
MCP_SANDBOX_STATE_CAPABILITY.to_string(),
sandbox_state_capability,
);
ServerInfo {
protocol_version: ProtocolVersion::V_2025_06_18,
capabilities: ServerCapabilities::builder().enable_tools().build(),
capabilities: ServerCapabilities::builder()
.enable_tools()
.enable_experimental_with(experimental_capabilities)
.build(),
server_info: Implementation::from_build_env(),
instructions: Some(
"This server provides a tool to execute shell commands and return their output."
@@ -133,6 +168,31 @@ impl ServerHandler for ExecTool {
) -> Result<InitializeResult, McpError> {
Ok(self.get_info())
}
async fn on_custom_notification(
&self,
notification: rmcp::model::CustomClientNotification,
_context: rmcp::service::NotificationContext<rmcp::RoleServer>,
) {
let rmcp::model::CustomClientNotification { method, params, .. } = notification;
if method == MCP_SANDBOX_STATE_NOTIFICATION
&& let Some(params) = params
{
match serde_json::from_value::<SandboxState>(params) {
Ok(sandbox_state) => {
debug!(
?sandbox_state.sandbox_policy,
"received sandbox state notification"
);
let mut state = self.sandbox_state.write().await;
*state = Some(sandbox_state);
}
Err(err) => {
tracing::warn!(?err, "failed to deserialize sandbox state notification");
}
}
}
}
}
pub(crate) async fn serve(