mirror of
https://github.com/openai/codex.git
synced 2026-04-30 19:32:04 +03:00
feat: shell_command tool (#6510)
This adds support for a new variant of the shell tool behind a flag. To
test, run `codex` with `--enable shell_command_tool`, which will
register the tool with Codex under the name `shell_command` that accepts
the following shape:
```python
{
command: str
workdir: str | None,
timeout_ms: int | None,
with_escalated_permissions: bool | None,
justification: str | None,
}
```
This is comparable to the existing tool registered under
`shell`/`container.exec`. The primary difference is that it accepts
`command` as a `str` instead of a `str[]`. The `shell_command` tool
executes by running `execvp(["bash", "-lc", command])`, though the exact
arguments to `execvp(3)` depend on the user's default shell.
The hypothesis is that this will simplify things for the model. For
example, on Windows, instead of generating:
```json
{"command": ["pwsh.exe", "-NoLogo", "-Command", "ls -Name"]}
```
The model could simply generate:
```json
{"command": "ls -Name"}
```
As part of this change, I extracted some logic out of `user_shell.rs` as
`Shell::derive_exec_args()` so that it can be reused in
`codex-rs/core/src/tools/handlers/shell.rs`. Note the original code
generated exec arg lists like:
```javascript
["bash", "-lc", command]
["zsh", "-lc", command]
["pwsh.exe", "-NoProfile", "-Command", command]
```
Using `-l` for Bash and Zsh, but then specifying `-NoProfile` for
PowerShell seemed inconsistent to me, so I changed this in the new
implementation while also adding a `use_login_shell: bool` option to
make this explicit. If we decide to add a `login: bool` to
`ShellCommandToolCallParams` like we have for unified exec:
807e2c27f0/codex-rs/core/src/tools/handlers/unified_exec.rs (L33-L34)
Then this should make it straightforward to support.
This commit is contained in:
@@ -292,7 +292,7 @@ impl From<Vec<UserInput>> for ResponseInputItem {
|
||||
}
|
||||
|
||||
/// If the `name` of a `ResponseItem::FunctionCall` is either `container.exec`
|
||||
/// or shell`, the `arguments` field should deserialize to this struct.
|
||||
/// or `shell`, the `arguments` field should deserialize to this struct.
|
||||
#[derive(Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
pub struct ShellToolCallParams {
|
||||
pub command: Vec<String>,
|
||||
@@ -307,6 +307,22 @@ pub struct ShellToolCallParams {
|
||||
pub justification: Option<String>,
|
||||
}
|
||||
|
||||
/// If the `name` of a `ResponseItem::FunctionCall` is `shell_command`, the
|
||||
/// `arguments` field should deserialize to this struct.
|
||||
#[derive(Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
pub struct ShellCommandToolCallParams {
|
||||
pub command: String,
|
||||
pub workdir: Option<String>,
|
||||
|
||||
/// This is the maximum time in milliseconds that the command is allowed to run.
|
||||
#[serde(alias = "timeout")]
|
||||
pub timeout_ms: Option<u64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub justification: Option<String>,
|
||||
}
|
||||
|
||||
/// Responses API compatible content items that can be returned by a tool call.
|
||||
/// This is a subset of ContentItem with the types we support as function call outputs.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
|
||||
|
||||
Reference in New Issue
Block a user