feat: shell_command tool (#6510)

This adds support for a new variant of the shell tool behind a flag. To test, run `codex` with `--enable shell_command_tool`, which will register the tool with Codex under the name `shell_command` that accepts the following shape: ```python { command: str workdir: str | None, timeout_ms: int | None, with_escalated_permissions: bool | None, justification: str | None, } ``` This is comparable to the existing tool registered under `shell`/`container.exec`. The primary difference is that it accepts `command` as a `str` instead of a `str[]`. The `shell_command` tool executes by running `execvp(["bash", "-lc", command])`, though the exact arguments to `execvp(3)` depend on the user's default shell. The hypothesis is that this will simplify things for the model. For example, on Windows, instead of generating: ```json {"command": ["pwsh.exe", "-NoLogo", "-Command", "ls -Name"]} ``` The model could simply generate: ```json {"command": "ls -Name"} ``` As part of this change, I extracted some logic out of `user_shell.rs` as `Shell::derive_exec_args()` so that it can be reused in `codex-rs/core/src/tools/handlers/shell.rs`. Note the original code generated exec arg lists like: ```javascript ["bash", "-lc", command] ["zsh", "-lc", command] ["pwsh.exe", "-NoProfile", "-Command", command] ``` Using `-l` for Bash and Zsh, but then specifying `-NoProfile` for PowerShell seemed inconsistent to me, so I changed this in the new implementation while also adding a `use_login_shell: bool` option to make this explicit. If we decide to add a `login: bool` to `ShellCommandToolCallParams` like we have for unified exec: 807e2c27f0/codex-rs/core/src/tools/handlers/unified_exec.rs (L33-L34) Then this should make it straightforward to support.
2026-05-05 05:42:33 +03:00 · 2025-11-12 08:18:57 -08:00
parent 530db0ad73
commit 29364f3a9b
8 changed files with 262 additions and 29 deletions
--- a/codex-rs/core/src/tools/spec.rs
+++ b/codex-rs/core/src/tools/spec.rs
@@ -20,6 +20,8 @@ pub enum ConfigShellToolType {
    Default,
    Local,
    UnifiedExec,
+    /// Takes a command as a single string to be run in the user's default shell.
+    ShellCommand,
 }

 #[derive(Debug, Clone)]
@@ -48,6 +50,8 @@ impl ToolsConfig {

        let shell_type = if features.enabled(Feature::UnifiedExec) {
            ConfigShellToolType::UnifiedExec
+        } else if features.enabled(Feature::ShellCommandTool) {
+            ConfigShellToolType::ShellCommand
        } else {
            model_family.shell_type.clone()
        };
@@ -302,6 +306,53 @@ fn create_shell_tool() -> ToolSpec {
    })
 }

+fn create_shell_command_tool() -> ToolSpec {
+    let mut properties = BTreeMap::new();
+    properties.insert(
+        "command".to_string(),
+        JsonSchema::String {
+            description: Some(
+                "The shell script to execute in the user's default shell".to_string(),
+            ),
+        },
+    );
+    properties.insert(
+        "workdir".to_string(),
+        JsonSchema::String {
+            description: Some("The working directory to execute the command in".to_string()),
+        },
+    );
+    properties.insert(
+        "timeout_ms".to_string(),
+        JsonSchema::Number {
+            description: Some("The timeout for the command in milliseconds".to_string()),
+        },
+    );
+    properties.insert(
+        "with_escalated_permissions".to_string(),
+        JsonSchema::Boolean {
+            description: Some("Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions".to_string()),
+        },
+    );
+    properties.insert(
+        "justification".to_string(),
+        JsonSchema::String {
+            description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()),
+        },
+    );
+
+    ToolSpec::Function(ResponsesApiTool {
+        name: "shell_command".to_string(),
+        description: "Runs a shell command string and returns its output.".to_string(),
+        strict: false,
+        parameters: JsonSchema::Object {
+            properties,
+            required: Some(vec!["command".to_string()]),
+            additional_properties: Some(false.into()),
+        },
+    })
+}
+
 fn create_view_image_tool() -> ToolSpec {
    // Support only local filesystem path.
    let mut properties = BTreeMap::new();
@@ -891,6 +942,7 @@ pub(crate) fn build_specs(
    use crate::tools::handlers::McpResourceHandler;
    use crate::tools::handlers::PlanHandler;
    use crate::tools::handlers::ReadFileHandler;
+    use crate::tools::handlers::ShellCommandHandler;
    use crate::tools::handlers::ShellHandler;
    use crate::tools::handlers::TestSyncHandler;
    use crate::tools::handlers::UnifiedExecHandler;
@@ -906,6 +958,7 @@ pub(crate) fn build_specs(
    let view_image_handler = Arc::new(ViewImageHandler);
    let mcp_handler = Arc::new(McpHandler);
    let mcp_resource_handler = Arc::new(McpResourceHandler);
+    let shell_command_handler = Arc::new(ShellCommandHandler);

    match &config.shell_type {
        ConfigShellToolType::Default => {
@@ -920,12 +973,16 @@ pub(crate) fn build_specs(
            builder.register_handler("exec_command", unified_exec_handler.clone());
            builder.register_handler("write_stdin", unified_exec_handler);
        }
+        ConfigShellToolType::ShellCommand => {
+            builder.push_spec(create_shell_command_tool());
+        }
    }

    // Always register shell aliases so older prompts remain compatible.
    builder.register_handler("shell", shell_handler.clone());
    builder.register_handler("container.exec", shell_handler.clone());
    builder.register_handler("local_shell", shell_handler);
+    builder.register_handler("shell_command", shell_command_handler);

    builder.push_spec_with_parallel_support(create_list_mcp_resources_tool(), true);
    builder.push_spec_with_parallel_support(create_list_mcp_resource_templates_tool(), true);
@@ -1061,6 +1118,7 @@ mod tests {
            ConfigShellToolType::Default => Some("shell"),
            ConfigShellToolType::Local => Some("local_shell"),
            ConfigShellToolType::UnifiedExec => None,
+            ConfigShellToolType::ShellCommand => Some("shell_command"),
        }
    }

@@ -1293,6 +1351,22 @@ mod tests {
        assert_contains_tool_names(&tools, &subset);
    }

+    #[test]
+    fn test_build_specs_shell_command_present() {
+        assert_model_tools(
+            "codex-mini-latest",
+            Features::with_defaults().enable(Feature::ShellCommandTool),
+            &[
+                "shell_command",
+                "list_mcp_resources",
+                "list_mcp_resource_templates",
+                "read_mcp_resource",
+                "update_plan",
+                "view_image",
+            ],
+        );
+    }
+
    #[test]
    #[ignore]
    fn test_parallel_support_flags() {
@@ -1748,6 +1822,21 @@ mod tests {
        assert_eq!(description, expected);
    }

+    #[test]
+    fn test_shell_command_tool() {
+        let tool = super::create_shell_command_tool();
+        let ToolSpec::Function(ResponsesApiTool {
+            description, name, ..
+        }) = &tool
+        else {
+            panic!("expected function tool");
+        };
+        assert_eq!(name, "shell_command");
+
+        let expected = "Runs a shell command string and returns its output.";
+        assert_eq!(description, expected);
+    }
+
    #[test]
    fn test_get_openai_tools_mcp_tools_with_additional_properties_schema() {
        let model_family = find_family_for_model("gpt-5-codex")