Files
codex/codex-rs/command/src/command_safety/is_dangerous_command.rs
Michael Bolin d8f9bb65e2 # Split command parsing/safety out of codex-core into new codex-command (#11361)
`codex-core` had accumulated command parsing and command safety logic
(`bash`, `powershell`, `parse_command`, and `command_safety`) that is
logically cohesive but orthogonal to most core session/runtime logic.
Keeping this code in `codex-core` made the crate increasingly monolithic
and raised iteration cost for unrelated core changes.

This change extracts that surface into a dedicated crate,
`codex-command`, while preserving existing `codex_core::...` call sites
via re-exports.

## Why this refactor

During analysis, command parsing/safety stood out as a good first split
because it has:

- a clear domain boundary (shell parsing + safety classification)
- relatively self-contained dependencies (notably `tree-sitter` /
`tree-sitter-bash`)
- a meaningful standalone test surface (`134` tests moved with the
crate)
- many downstream uses that benefit from independent compilation and
caching

The practical problem was build latency from a large `codex-core`
compile/test graph. Clean-build timings before and after this split
showed measurable wins:

- `cargo check -p codex-core`: `57.08s` -> `53.54s` (~`6.2%` faster)
- `cargo test -p codex-core --no-run`: `2m39.9s` -> `2m20s` (~`12.4%`
faster)
- `codex-core lib` compile unit: `57.18s` -> `49.67s` (~`13.1%` faster)
- `codex-core lib(test)` compile unit: `60.87s` -> `53.21s` (~`12.6%`
faster)

This gives a concrete reduction in core build overhead without changing
behavior.

## What changed

### New crate

- Added `codex-rs/command` as workspace crate `codex-command`.
- Added:
  - `command/src/lib.rs`
  - `command/src/bash.rs`
  - `command/src/powershell.rs`
  - `command/src/parse_command.rs`
  - `command/src/command_safety/*`
  - `command/src/shell_detect.rs`
  - `command/BUILD.bazel`

### Code moved out of `codex-core`

- Moved modules from `core/src` into `command/src`:
  - `bash.rs`
  - `powershell.rs`
  - `parse_command.rs`
  - `command_safety/*`

### Dependency graph updates

- Added workspace member/dependency entries for `codex-command` in
`codex-rs/Cargo.toml`.
- Added `codex-command` dependency to `codex-rs/core/Cargo.toml`.
- Removed `tree-sitter` and `tree-sitter-bash` from `codex-core` direct
deps (now owned by `codex-command`).

### API compatibility for callers

To avoid immediate downstream churn, `codex-core` now re-exports the
moved modules/functions:

- `codex_command::bash`
- `codex_command::powershell`
- `codex_command::parse_command`
- `codex_command::is_safe_command`
- `codex_command::is_dangerous_command`

This keeps existing `codex_core::...` paths working while enabling
gradual migration to direct `codex-command` usage.

### Internal decoupling detail

- Added `command::shell_detect` so moved `bash`/`powershell` logic no
longer depends on core shell internals.
- Adjusted PowerShell helper visibility in `codex-command` for existing
core test usage (`UTF8` prefix helper + executable discovery functions).

## Validation

- `just fmt`
- `just fix -p codex-command -p codex-core`
- `cargo test -p codex-command` (`134` passed)
- `cargo test -p codex-core --no-run`
- `cargo test -p codex-core shell_command_handler`

## Notes / follow-up

This commit intentionally prioritizes boundary extraction and
compatibility. A follow-up can migrate downstream crates to depend
directly on `codex-command` (instead of through `codex-core` re-exports)
to realize additional incremental build wins.
2026-02-10 14:43:16 -08:00

383 lines
11 KiB
Rust

use crate::bash::parse_shell_lc_plain_commands;
#[cfg(windows)]
#[path = "windows_dangerous_commands.rs"]
mod windows_dangerous_commands;
pub fn command_might_be_dangerous(command: &[String]) -> bool {
#[cfg(windows)]
{
if windows_dangerous_commands::is_dangerous_command_windows(command) {
return true;
}
}
if is_dangerous_to_call_with_exec(command) {
return true;
}
// Support `bash -lc "<script>"` where the any part of the script might contain a dangerous command.
if let Some(all_commands) = parse_shell_lc_plain_commands(command)
&& all_commands
.iter()
.any(|cmd| is_dangerous_to_call_with_exec(cmd))
{
return true;
}
false
}
fn is_git_global_option_with_value(arg: &str) -> bool {
matches!(
arg,
"-C" | "-c"
| "--config-env"
| "--exec-path"
| "--git-dir"
| "--namespace"
| "--super-prefix"
| "--work-tree"
)
}
fn is_git_global_option_with_inline_value(arg: &str) -> bool {
matches!(
arg,
s if s.starts_with("--config-env=")
|| s.starts_with("--exec-path=")
|| s.starts_with("--git-dir=")
|| s.starts_with("--namespace=")
|| s.starts_with("--super-prefix=")
|| s.starts_with("--work-tree=")
) || ((arg.starts_with("-C") || arg.starts_with("-c")) && arg.len() > 2)
}
/// Find the first matching git subcommand, skipping known global options that
/// may appear before it (e.g., `-C`, `-c`, `--git-dir`).
///
/// Shared with `is_safe_command` to avoid git-global-option bypasses.
pub(crate) fn find_git_subcommand<'a>(
command: &'a [String],
subcommands: &[&str],
) -> Option<(usize, &'a str)> {
let cmd0 = command.first().map(String::as_str)?;
if !cmd0.ends_with("git") {
return None;
}
let mut skip_next = false;
for (idx, arg) in command.iter().enumerate().skip(1) {
if skip_next {
skip_next = false;
continue;
}
let arg = arg.as_str();
if is_git_global_option_with_inline_value(arg) {
continue;
}
if is_git_global_option_with_value(arg) {
skip_next = true;
continue;
}
if arg == "--" || arg.starts_with('-') {
continue;
}
if subcommands.contains(&arg) {
return Some((idx, arg));
}
// In git, the first non-option token is the subcommand. If it isn't
// one of the subcommands we're looking for, we must stop scanning to
// avoid misclassifying later positional args (e.g., branch names).
return None;
}
None
}
fn is_dangerous_to_call_with_exec(command: &[String]) -> bool {
let cmd0 = command.first().map(String::as_str);
match cmd0 {
Some(cmd) if cmd.ends_with("git") => {
let Some((subcommand_idx, subcommand)) =
find_git_subcommand(command, &["reset", "rm", "branch", "push", "clean"])
else {
return false;
};
match subcommand {
"reset" | "rm" => true,
"branch" => git_branch_is_delete(&command[subcommand_idx + 1..]),
"push" => git_push_is_dangerous(&command[subcommand_idx + 1..]),
"clean" => git_clean_is_force(&command[subcommand_idx + 1..]),
other => {
debug_assert!(false, "unexpected git subcommand from matcher: {other}");
false
}
}
}
Some("rm") => matches!(command.get(1).map(String::as_str), Some("-f" | "-rf")),
// for sudo <cmd> simply do the check for <cmd>
Some("sudo") => is_dangerous_to_call_with_exec(&command[1..]),
// ── anything else ─────────────────────────────────────────────────
_ => false,
}
}
fn git_branch_is_delete(branch_args: &[String]) -> bool {
// Git allows stacking short flags (for example, `-dv` or `-vd`). Treat any
// short-flag group containing `d`/`D` as a delete flag.
branch_args.iter().map(String::as_str).any(|arg| {
matches!(arg, "-d" | "-D" | "--delete")
|| arg.starts_with("--delete=")
|| short_flag_group_contains(arg, 'd')
|| short_flag_group_contains(arg, 'D')
})
}
fn short_flag_group_contains(arg: &str, target: char) -> bool {
arg.starts_with('-') && !arg.starts_with("--") && arg.chars().skip(1).any(|c| c == target)
}
fn git_push_is_dangerous(push_args: &[String]) -> bool {
push_args.iter().map(String::as_str).any(|arg| {
matches!(
arg,
"--force" | "--force-with-lease" | "--force-if-includes" | "--delete" | "-f" | "-d"
) || arg.starts_with("--force-with-lease=")
|| arg.starts_with("--force-if-includes=")
|| arg.starts_with("--delete=")
|| short_flag_group_contains(arg, 'f')
|| short_flag_group_contains(arg, 'd')
|| git_push_refspec_is_dangerous(arg)
})
}
fn git_push_refspec_is_dangerous(arg: &str) -> bool {
// `+<refspec>` forces updates and `:<dst>` deletes remote refs.
(arg.starts_with('+') || arg.starts_with(':')) && arg.len() > 1
}
fn git_clean_is_force(clean_args: &[String]) -> bool {
clean_args.iter().map(String::as_str).any(|arg| {
matches!(arg, "--force" | "-f")
|| arg.starts_with("--force=")
|| short_flag_group_contains(arg, 'f')
})
}
#[cfg(test)]
mod tests {
use super::*;
fn vec_str(items: &[&str]) -> Vec<String> {
items.iter().map(std::string::ToString::to_string).collect()
}
#[test]
fn git_reset_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&["git", "reset"])));
}
#[test]
fn bash_git_reset_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"bash",
"-lc",
"git reset --hard",
])));
}
#[test]
fn zsh_git_reset_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"zsh",
"-lc",
"git reset --hard",
])));
}
#[test]
fn git_status_is_not_dangerous() {
assert!(!command_might_be_dangerous(&vec_str(&["git", "status"])));
}
#[test]
fn bash_git_status_is_not_dangerous() {
assert!(!command_might_be_dangerous(&vec_str(&[
"bash",
"-lc",
"git status",
])));
}
#[test]
fn sudo_git_reset_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"sudo", "git", "reset", "--hard",
])));
}
#[test]
fn usr_bin_git_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"/usr/bin/git",
"reset",
"--hard",
])));
}
#[test]
fn git_branch_delete_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"git", "branch", "-d", "feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git", "branch", "-D", "feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"bash",
"-lc",
"git branch --delete feature",
])));
}
#[test]
fn git_branch_delete_with_stacked_short_flags_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"git", "branch", "-dv", "feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git", "branch", "-vd", "feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git", "branch", "-vD", "feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git", "branch", "-Dvv", "feature",
])));
}
#[test]
fn git_branch_delete_with_global_options_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"git", "-C", ".", "branch", "-d", "feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git",
"-c",
"color.ui=false",
"branch",
"-D",
"feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"bash",
"-lc",
"git -C . branch -d feature",
])));
}
#[test]
fn git_checkout_reset_is_not_dangerous() {
// The first non-option token is "checkout", so later positional args
// like branch names must not be treated as subcommands.
assert!(!command_might_be_dangerous(&vec_str(&[
"git", "checkout", "reset",
])));
}
#[test]
fn git_push_force_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"git", "push", "--force", "origin", "main",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git", "push", "-f", "origin", "main",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git",
"-C",
".",
"push",
"--force-with-lease",
"origin",
"main",
])));
}
#[test]
fn git_push_plus_refspec_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"git", "push", "origin", "+main",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git",
"push",
"origin",
"+refs/heads/main:refs/heads/main",
])));
}
#[test]
fn git_push_delete_flag_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"git", "push", "--delete", "origin", "feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git", "push", "-d", "origin", "feature",
])));
}
#[test]
fn git_push_delete_refspec_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"git", "push", "origin", ":feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"bash",
"-lc",
"git push origin :feature",
])));
}
#[test]
fn git_push_without_force_is_not_dangerous() {
assert!(!command_might_be_dangerous(&vec_str(&[
"git", "push", "origin", "main",
])));
}
#[test]
fn git_clean_force_is_dangerous_even_when_f_is_not_first_flag() {
assert!(command_might_be_dangerous(&vec_str(&[
"git", "clean", "-fdx",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git", "clean", "-xdf",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git", "clean", "--force",
])));
}
#[test]
fn rm_rf_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&["rm", "-rf", "/"])));
}
#[test]
fn rm_f_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&["rm", "-f", "/"])));
}
}