chore: rename codex-command to codex-shell-command (#11378)

This addresses some post-merge feedback on
https://github.com/openai/codex/pull/11361:

- crate rename
- reuse `detect_shell_type()` utility
This commit is contained in:
Michael Bolin
2026-02-10 17:03:46 -08:00
committed by GitHub
parent 87bbfc50a1
commit d44f4205fb
19 changed files with 58 additions and 53 deletions

View File

@@ -0,0 +1,565 @@
use std::path::PathBuf;
use tree_sitter::Node;
use tree_sitter::Parser;
use tree_sitter::Tree;
use tree_sitter_bash::LANGUAGE as BASH;
use crate::shell_detect::ShellType;
use crate::shell_detect::detect_shell_type;
/// Parse the provided bash source using tree-sitter-bash, returning a Tree on
/// success or None if parsing failed.
pub fn try_parse_shell(shell_lc_arg: &str) -> Option<Tree> {
let lang = BASH.into();
let mut parser = Parser::new();
#[expect(clippy::expect_used)]
parser.set_language(&lang).expect("load bash grammar");
let old_tree: Option<&Tree> = None;
parser.parse(shell_lc_arg, old_tree)
}
/// Parse a script which may contain multiple simple commands joined only by
/// the safe logical/pipe/sequencing operators: `&&`, `||`, `;`, `|`.
///
/// Returns `Some(Vec<command_words>)` if every command is a plain wordonly
/// command and the parse tree does not contain disallowed constructs
/// (parentheses, redirections, substitutions, control flow, etc.). Otherwise
/// returns `None`.
pub fn try_parse_word_only_commands_sequence(tree: &Tree, src: &str) -> Option<Vec<Vec<String>>> {
if tree.root_node().has_error() {
return None;
}
// List of allowed (named) node kinds for a "word only commands sequence".
// If we encounter a named node that is not in this list we reject.
const ALLOWED_KINDS: &[&str] = &[
// top level containers
"program",
"list",
"pipeline",
// commands & words
"command",
"command_name",
"word",
"string",
"string_content",
"raw_string",
"number",
"concatenation",
];
// Allow only safe punctuation / operator tokens; anything else causes reject.
const ALLOWED_PUNCT_TOKENS: &[&str] = &["&&", "||", ";", "|", "\"", "'"];
let root = tree.root_node();
let mut cursor = root.walk();
let mut stack = vec![root];
let mut command_nodes = Vec::new();
while let Some(node) = stack.pop() {
let kind = node.kind();
if node.is_named() {
if !ALLOWED_KINDS.contains(&kind) {
return None;
}
if kind == "command" {
command_nodes.push(node);
}
} else {
// Reject any punctuation / operator tokens that are not explicitly allowed.
if kind.chars().any(|c| "&;|".contains(c)) && !ALLOWED_PUNCT_TOKENS.contains(&kind) {
return None;
}
if !(ALLOWED_PUNCT_TOKENS.contains(&kind) || kind.trim().is_empty()) {
// If it's a quote token or operator it's allowed above; we also allow whitespace tokens.
// Any other punctuation like parentheses, braces, redirects, backticks, etc are rejected.
return None;
}
}
for child in node.children(&mut cursor) {
stack.push(child);
}
}
// Walk uses a stack (LIFO), so re-sort by position to restore source order.
command_nodes.sort_by_key(Node::start_byte);
let mut commands = Vec::new();
for node in command_nodes {
if let Some(words) = parse_plain_command_from_node(node, src) {
commands.push(words);
} else {
return None;
}
}
Some(commands)
}
pub fn extract_bash_command(command: &[String]) -> Option<(&str, &str)> {
let [shell, flag, script] = command else {
return None;
};
if !matches!(flag.as_str(), "-lc" | "-c")
|| !matches!(
detect_shell_type(&PathBuf::from(shell)),
Some(ShellType::Zsh) | Some(ShellType::Bash) | Some(ShellType::Sh)
)
{
return None;
}
Some((shell, script))
}
/// Returns the sequence of plain commands within a `bash -lc "..."` or
/// `zsh -lc "..."` invocation when the script only contains word-only commands
/// joined by safe operators.
pub fn parse_shell_lc_plain_commands(command: &[String]) -> Option<Vec<Vec<String>>> {
let (_, script) = extract_bash_command(command)?;
let tree = try_parse_shell(script)?;
try_parse_word_only_commands_sequence(&tree, script)
}
/// Returns the parsed argv for a single shell command in a here-doc style
/// script (`<<`), as long as the script contains exactly one command node.
pub fn parse_shell_lc_single_command_prefix(command: &[String]) -> Option<Vec<String>> {
let (_, script) = extract_bash_command(command)?;
let tree = try_parse_shell(script)?;
let root = tree.root_node();
if root.has_error() {
return None;
}
if !has_named_descendant_kind(root, "heredoc_redirect") {
return None;
}
let command_node = find_single_command_node(root)?;
parse_heredoc_command_words(command_node, script)
}
fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option<Vec<String>> {
if cmd.kind() != "command" {
return None;
}
let mut words = Vec::new();
let mut cursor = cmd.walk();
for child in cmd.named_children(&mut cursor) {
match child.kind() {
"command_name" => {
let word_node = child.named_child(0)?;
if word_node.kind() != "word" {
return None;
}
words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned());
}
"word" | "number" => {
words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned());
}
"string" => {
let parsed = parse_double_quoted_string(child, src)?;
words.push(parsed);
}
"raw_string" => {
let parsed = parse_raw_string(child, src)?;
words.push(parsed);
}
"concatenation" => {
// Handle concatenated arguments like -g"*.py"
let mut concatenated = String::new();
let mut concat_cursor = child.walk();
for part in child.named_children(&mut concat_cursor) {
match part.kind() {
"word" | "number" => {
concatenated
.push_str(part.utf8_text(src.as_bytes()).ok()?.to_owned().as_str());
}
"string" => {
let parsed = parse_double_quoted_string(part, src)?;
concatenated.push_str(&parsed);
}
"raw_string" => {
let parsed = parse_raw_string(part, src)?;
concatenated.push_str(&parsed);
}
_ => return None,
}
}
if concatenated.is_empty() {
return None;
}
words.push(concatenated);
}
_ => return None,
}
}
Some(words)
}
fn parse_heredoc_command_words(cmd: Node<'_>, src: &str) -> Option<Vec<String>> {
if cmd.kind() != "command" {
return None;
}
let mut words = Vec::new();
let mut cursor = cmd.walk();
for child in cmd.named_children(&mut cursor) {
match child.kind() {
"command_name" => {
let word_node = child.named_child(0)?;
if !matches!(word_node.kind(), "word" | "number")
|| !is_literal_word_or_number(word_node)
{
return None;
}
words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned());
}
"word" | "number" => {
if !is_literal_word_or_number(child) {
return None;
}
words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned());
}
// Allow shell constructs that attach IO to a single command without
// changing argv matching semantics for the executable prefix.
"variable_assignment" | "comment" => {}
kind if is_allowed_heredoc_attachment_kind(kind) => {}
_ => return None,
}
}
if words.is_empty() { None } else { Some(words) }
}
fn is_literal_word_or_number(node: Node<'_>) -> bool {
if !matches!(node.kind(), "word" | "number") {
return false;
}
let mut cursor = node.walk();
node.named_children(&mut cursor).next().is_none()
}
fn has_named_descendant_kind(node: Node<'_>, kind: &str) -> bool {
let mut stack = vec![node];
while let Some(current) = stack.pop() {
if current.kind() == kind {
return true;
}
let mut cursor = current.walk();
for child in current.named_children(&mut cursor) {
stack.push(child);
}
}
false
}
fn is_allowed_heredoc_attachment_kind(kind: &str) -> bool {
matches!(
kind,
"heredoc_body"
| "simple_heredoc_body"
| "heredoc_redirect"
| "herestring_redirect"
| "file_redirect"
| "redirected_statement"
)
}
fn find_single_command_node(root: Node<'_>) -> Option<Node<'_>> {
let mut stack = vec![root];
let mut single_command = None;
while let Some(node) = stack.pop() {
if node.kind() == "command" {
if single_command.is_some() {
return None;
}
single_command = Some(node);
}
let mut cursor = node.walk();
for child in node.named_children(&mut cursor) {
stack.push(child);
}
}
single_command
}
fn parse_double_quoted_string(node: Node, src: &str) -> Option<String> {
if node.kind() != "string" {
return None;
}
let mut cursor = node.walk();
for part in node.named_children(&mut cursor) {
if part.kind() != "string_content" {
return None;
}
}
let raw = node.utf8_text(src.as_bytes()).ok()?;
let stripped = raw
.strip_prefix('"')
.and_then(|text| text.strip_suffix('"'))?;
Some(stripped.to_string())
}
fn parse_raw_string(node: Node, src: &str) -> Option<String> {
if node.kind() != "raw_string" {
return None;
}
let raw_string = node.utf8_text(src.as_bytes()).ok()?;
let stripped = raw_string
.strip_prefix('\'')
.and_then(|s| s.strip_suffix('\''));
stripped.map(str::to_owned)
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
fn parse_seq(src: &str) -> Option<Vec<Vec<String>>> {
let tree = try_parse_shell(src)?;
try_parse_word_only_commands_sequence(&tree, src)
}
#[test]
fn accepts_single_simple_command() {
let cmds = parse_seq("ls -1").unwrap();
assert_eq!(cmds, vec![vec!["ls".to_string(), "-1".to_string()]]);
}
#[test]
fn accepts_multiple_commands_with_allowed_operators() {
let src = "ls && pwd; echo 'hi there' | wc -l";
let cmds = parse_seq(src).unwrap();
let expected: Vec<Vec<String>> = vec![
vec!["ls".to_string()],
vec!["pwd".to_string()],
vec!["echo".to_string(), "hi there".to_string()],
vec!["wc".to_string(), "-l".to_string()],
];
assert_eq!(cmds, expected);
}
#[test]
fn extracts_double_and_single_quoted_strings() {
let cmds = parse_seq("echo \"hello world\"").unwrap();
assert_eq!(
cmds,
vec![vec!["echo".to_string(), "hello world".to_string()]]
);
let cmds2 = parse_seq("echo 'hi there'").unwrap();
assert_eq!(
cmds2,
vec![vec!["echo".to_string(), "hi there".to_string()]]
);
}
#[test]
fn accepts_double_quoted_strings_with_newlines() {
let cmds = parse_seq("git commit -m \"line1\nline2\"").unwrap();
assert_eq!(
cmds,
vec![vec![
"git".to_string(),
"commit".to_string(),
"-m".to_string(),
"line1\nline2".to_string(),
]]
);
}
#[test]
fn accepts_mixed_quote_concatenation() {
assert_eq!(
parse_seq(r#"echo "/usr"'/'"local"/bin"#).unwrap(),
vec![vec!["echo".to_string(), "/usr/local/bin".to_string()]]
);
assert_eq!(
parse_seq(r#"echo '/usr'"/"'local'/bin"#).unwrap(),
vec![vec!["echo".to_string(), "/usr/local/bin".to_string()]]
);
}
#[test]
fn rejects_double_quoted_strings_with_expansions() {
assert!(parse_seq(r#"echo "hi ${USER}""#).is_none());
assert!(parse_seq(r#"echo "$HOME""#).is_none());
}
#[test]
fn accepts_numbers_as_words() {
let cmds = parse_seq("echo 123 456").unwrap();
assert_eq!(
cmds,
vec![vec![
"echo".to_string(),
"123".to_string(),
"456".to_string()
]]
);
}
#[test]
fn rejects_parentheses_and_subshells() {
assert!(parse_seq("(ls)").is_none());
assert!(parse_seq("ls || (pwd && echo hi)").is_none());
}
#[test]
fn rejects_redirections_and_unsupported_operators() {
assert!(parse_seq("ls > out.txt").is_none());
assert!(parse_seq("echo hi & echo bye").is_none());
}
#[test]
fn rejects_command_and_process_substitutions_and_expansions() {
assert!(parse_seq("echo $(pwd)").is_none());
assert!(parse_seq("echo `pwd`").is_none());
assert!(parse_seq("echo $HOME").is_none());
assert!(parse_seq("echo \"hi $USER\"").is_none());
}
#[test]
fn rejects_variable_assignment_prefix() {
assert!(parse_seq("FOO=bar ls").is_none());
}
#[test]
fn rejects_trailing_operator_parse_error() {
assert!(parse_seq("ls &&").is_none());
}
#[test]
fn parse_zsh_lc_plain_commands() {
let command = vec!["zsh".to_string(), "-lc".to_string(), "ls".to_string()];
let parsed = parse_shell_lc_plain_commands(&command).unwrap();
assert_eq!(parsed, vec![vec!["ls".to_string()]]);
}
#[test]
fn accepts_concatenated_flag_and_value() {
// Test case: -g"*.py" (flag directly concatenated with quoted value)
let cmds = parse_seq("rg -n \"foo\" -g\"*.py\"").unwrap();
assert_eq!(
cmds,
vec![vec![
"rg".to_string(),
"-n".to_string(),
"foo".to_string(),
"-g*.py".to_string(),
]]
);
}
#[test]
fn accepts_concatenated_flag_with_single_quotes() {
let cmds = parse_seq("grep -n 'pattern' -g'*.txt'").unwrap();
assert_eq!(
cmds,
vec![vec![
"grep".to_string(),
"-n".to_string(),
"pattern".to_string(),
"-g*.txt".to_string(),
]]
);
}
#[test]
fn rejects_concatenation_with_variable_substitution() {
// Environment variables in concatenated strings should be rejected
assert!(parse_seq("rg -g\"$VAR\" pattern").is_none());
assert!(parse_seq("rg -g\"${VAR}\" pattern").is_none());
}
#[test]
fn rejects_concatenation_with_command_substitution() {
// Command substitution in concatenated strings should be rejected
assert!(parse_seq("rg -g\"$(pwd)\" pattern").is_none());
assert!(parse_seq("rg -g\"$(echo '*.py')\" pattern").is_none());
}
#[test]
fn parse_shell_lc_single_command_prefix_supports_heredoc() {
let command = vec![
"zsh".to_string(),
"-lc".to_string(),
"python3 <<'PY'\nprint('hello')\nPY".to_string(),
];
let parsed = parse_shell_lc_single_command_prefix(&command);
assert_eq!(parsed, Some(vec!["python3".to_string()]));
let command_unquoted = vec![
"zsh".to_string(),
"-lc".to_string(),
"python3 << PY\nprint('hello')\nPY".to_string(),
];
let parsed_unquoted = parse_shell_lc_single_command_prefix(&command_unquoted);
assert_eq!(parsed_unquoted, Some(vec!["python3".to_string()]));
}
#[test]
fn parse_shell_lc_single_command_prefix_rejects_multi_command_scripts() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
"python3 <<'PY'\nprint('hello')\nPY\necho done".to_string(),
];
assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
}
#[test]
fn parse_shell_lc_single_command_prefix_rejects_non_heredoc_redirects() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
"echo hello > /tmp/out.txt".to_string(),
];
assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
}
#[test]
fn parse_shell_lc_single_command_prefix_accepts_heredoc_with_extra_redirect() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
"python3 <<'PY' > /tmp/out.txt\nprint('hello')\nPY".to_string(),
];
assert_eq!(
parse_shell_lc_single_command_prefix(&command),
Some(vec!["python3".to_string()])
);
}
#[test]
fn parse_shell_lc_single_command_prefix_rejects_herestring_with_substitution() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
r#"python3 <<< "$(rm -rf /)""#.to_string(),
];
assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
}
#[test]
fn parse_shell_lc_single_command_prefix_rejects_arithmetic_shift_non_heredoc_script() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
"echo $((1<<2))".to_string(),
];
assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
}
#[test]
fn parse_shell_lc_single_command_prefix_rejects_heredoc_command_with_word_expansion() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
"python3 $((1<<2)) <<'PY'\nprint('hello')\nPY".to_string(),
];
assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
}
}

View File

@@ -0,0 +1,382 @@
use crate::bash::parse_shell_lc_plain_commands;
#[cfg(windows)]
#[path = "windows_dangerous_commands.rs"]
mod windows_dangerous_commands;
pub fn command_might_be_dangerous(command: &[String]) -> bool {
#[cfg(windows)]
{
if windows_dangerous_commands::is_dangerous_command_windows(command) {
return true;
}
}
if is_dangerous_to_call_with_exec(command) {
return true;
}
// Support `bash -lc "<script>"` where the any part of the script might contain a dangerous command.
if let Some(all_commands) = parse_shell_lc_plain_commands(command)
&& all_commands
.iter()
.any(|cmd| is_dangerous_to_call_with_exec(cmd))
{
return true;
}
false
}
fn is_git_global_option_with_value(arg: &str) -> bool {
matches!(
arg,
"-C" | "-c"
| "--config-env"
| "--exec-path"
| "--git-dir"
| "--namespace"
| "--super-prefix"
| "--work-tree"
)
}
fn is_git_global_option_with_inline_value(arg: &str) -> bool {
matches!(
arg,
s if s.starts_with("--config-env=")
|| s.starts_with("--exec-path=")
|| s.starts_with("--git-dir=")
|| s.starts_with("--namespace=")
|| s.starts_with("--super-prefix=")
|| s.starts_with("--work-tree=")
) || ((arg.starts_with("-C") || arg.starts_with("-c")) && arg.len() > 2)
}
/// Find the first matching git subcommand, skipping known global options that
/// may appear before it (e.g., `-C`, `-c`, `--git-dir`).
///
/// Shared with `is_safe_command` to avoid git-global-option bypasses.
pub(crate) fn find_git_subcommand<'a>(
command: &'a [String],
subcommands: &[&str],
) -> Option<(usize, &'a str)> {
let cmd0 = command.first().map(String::as_str)?;
if !cmd0.ends_with("git") {
return None;
}
let mut skip_next = false;
for (idx, arg) in command.iter().enumerate().skip(1) {
if skip_next {
skip_next = false;
continue;
}
let arg = arg.as_str();
if is_git_global_option_with_inline_value(arg) {
continue;
}
if is_git_global_option_with_value(arg) {
skip_next = true;
continue;
}
if arg == "--" || arg.starts_with('-') {
continue;
}
if subcommands.contains(&arg) {
return Some((idx, arg));
}
// In git, the first non-option token is the subcommand. If it isn't
// one of the subcommands we're looking for, we must stop scanning to
// avoid misclassifying later positional args (e.g., branch names).
return None;
}
None
}
fn is_dangerous_to_call_with_exec(command: &[String]) -> bool {
let cmd0 = command.first().map(String::as_str);
match cmd0 {
Some(cmd) if cmd.ends_with("git") => {
let Some((subcommand_idx, subcommand)) =
find_git_subcommand(command, &["reset", "rm", "branch", "push", "clean"])
else {
return false;
};
match subcommand {
"reset" | "rm" => true,
"branch" => git_branch_is_delete(&command[subcommand_idx + 1..]),
"push" => git_push_is_dangerous(&command[subcommand_idx + 1..]),
"clean" => git_clean_is_force(&command[subcommand_idx + 1..]),
other => {
debug_assert!(false, "unexpected git subcommand from matcher: {other}");
false
}
}
}
Some("rm") => matches!(command.get(1).map(String::as_str), Some("-f" | "-rf")),
// for sudo <cmd> simply do the check for <cmd>
Some("sudo") => is_dangerous_to_call_with_exec(&command[1..]),
// ── anything else ─────────────────────────────────────────────────
_ => false,
}
}
fn git_branch_is_delete(branch_args: &[String]) -> bool {
// Git allows stacking short flags (for example, `-dv` or `-vd`). Treat any
// short-flag group containing `d`/`D` as a delete flag.
branch_args.iter().map(String::as_str).any(|arg| {
matches!(arg, "-d" | "-D" | "--delete")
|| arg.starts_with("--delete=")
|| short_flag_group_contains(arg, 'd')
|| short_flag_group_contains(arg, 'D')
})
}
fn short_flag_group_contains(arg: &str, target: char) -> bool {
arg.starts_with('-') && !arg.starts_with("--") && arg.chars().skip(1).any(|c| c == target)
}
fn git_push_is_dangerous(push_args: &[String]) -> bool {
push_args.iter().map(String::as_str).any(|arg| {
matches!(
arg,
"--force" | "--force-with-lease" | "--force-if-includes" | "--delete" | "-f" | "-d"
) || arg.starts_with("--force-with-lease=")
|| arg.starts_with("--force-if-includes=")
|| arg.starts_with("--delete=")
|| short_flag_group_contains(arg, 'f')
|| short_flag_group_contains(arg, 'd')
|| git_push_refspec_is_dangerous(arg)
})
}
fn git_push_refspec_is_dangerous(arg: &str) -> bool {
// `+<refspec>` forces updates and `:<dst>` deletes remote refs.
(arg.starts_with('+') || arg.starts_with(':')) && arg.len() > 1
}
fn git_clean_is_force(clean_args: &[String]) -> bool {
clean_args.iter().map(String::as_str).any(|arg| {
matches!(arg, "--force" | "-f")
|| arg.starts_with("--force=")
|| short_flag_group_contains(arg, 'f')
})
}
#[cfg(test)]
mod tests {
use super::*;
fn vec_str(items: &[&str]) -> Vec<String> {
items.iter().map(std::string::ToString::to_string).collect()
}
#[test]
fn git_reset_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&["git", "reset"])));
}
#[test]
fn bash_git_reset_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"bash",
"-lc",
"git reset --hard",
])));
}
#[test]
fn zsh_git_reset_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"zsh",
"-lc",
"git reset --hard",
])));
}
#[test]
fn git_status_is_not_dangerous() {
assert!(!command_might_be_dangerous(&vec_str(&["git", "status"])));
}
#[test]
fn bash_git_status_is_not_dangerous() {
assert!(!command_might_be_dangerous(&vec_str(&[
"bash",
"-lc",
"git status",
])));
}
#[test]
fn sudo_git_reset_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"sudo", "git", "reset", "--hard",
])));
}
#[test]
fn usr_bin_git_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"/usr/bin/git",
"reset",
"--hard",
])));
}
#[test]
fn git_branch_delete_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"git", "branch", "-d", "feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git", "branch", "-D", "feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"bash",
"-lc",
"git branch --delete feature",
])));
}
#[test]
fn git_branch_delete_with_stacked_short_flags_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"git", "branch", "-dv", "feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git", "branch", "-vd", "feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git", "branch", "-vD", "feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git", "branch", "-Dvv", "feature",
])));
}
#[test]
fn git_branch_delete_with_global_options_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"git", "-C", ".", "branch", "-d", "feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git",
"-c",
"color.ui=false",
"branch",
"-D",
"feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"bash",
"-lc",
"git -C . branch -d feature",
])));
}
#[test]
fn git_checkout_reset_is_not_dangerous() {
// The first non-option token is "checkout", so later positional args
// like branch names must not be treated as subcommands.
assert!(!command_might_be_dangerous(&vec_str(&[
"git", "checkout", "reset",
])));
}
#[test]
fn git_push_force_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"git", "push", "--force", "origin", "main",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git", "push", "-f", "origin", "main",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git",
"-C",
".",
"push",
"--force-with-lease",
"origin",
"main",
])));
}
#[test]
fn git_push_plus_refspec_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"git", "push", "origin", "+main",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git",
"push",
"origin",
"+refs/heads/main:refs/heads/main",
])));
}
#[test]
fn git_push_delete_flag_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"git", "push", "--delete", "origin", "feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git", "push", "-d", "origin", "feature",
])));
}
#[test]
fn git_push_delete_refspec_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"git", "push", "origin", ":feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"bash",
"-lc",
"git push origin :feature",
])));
}
#[test]
fn git_push_without_force_is_not_dangerous() {
assert!(!command_might_be_dangerous(&vec_str(&[
"git", "push", "origin", "main",
])));
}
#[test]
fn git_clean_force_is_dangerous_even_when_f_is_not_first_flag() {
assert!(command_might_be_dangerous(&vec_str(&[
"git", "clean", "-fdx",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git", "clean", "-xdf",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git", "clean", "--force",
])));
}
#[test]
fn rm_rf_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&["rm", "-rf", "/"])));
}
#[test]
fn rm_f_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&["rm", "-f", "/"])));
}
}

View File

@@ -0,0 +1,592 @@
use crate::bash::parse_shell_lc_plain_commands;
// Find the first matching git subcommand, skipping known global options that
// may appear before it (e.g., `-C`, `-c`, `--git-dir`).
// Implemented in `is_dangerous_command` and shared here.
use crate::command_safety::is_dangerous_command::find_git_subcommand;
use crate::command_safety::windows_safe_commands::is_safe_command_windows;
pub fn is_known_safe_command(command: &[String]) -> bool {
let command: Vec<String> = command
.iter()
.map(|s| {
if s == "zsh" {
"bash".to_string()
} else {
s.clone()
}
})
.collect();
if is_safe_command_windows(&command) {
return true;
}
if is_safe_to_call_with_exec(&command) {
return true;
}
// Support `bash -lc "..."` where the script consists solely of one or
// more "plain" commands (only bare words / quoted strings) combined with
// a conservative allowlist of shell operators that themselves do not
// introduce side effects ( "&&", "||", ";", and "|" ). If every
// individual command in the script is itself a knownsafe command, then
// the composite expression is considered safe.
if let Some(all_commands) = parse_shell_lc_plain_commands(&command)
&& !all_commands.is_empty()
&& all_commands
.iter()
.all(|cmd| is_safe_to_call_with_exec(cmd))
{
return true;
}
false
}
fn is_safe_to_call_with_exec(command: &[String]) -> bool {
let Some(cmd0) = command.first().map(String::as_str) else {
return false;
};
match std::path::Path::new(&cmd0)
.file_name()
.and_then(|osstr| osstr.to_str())
{
Some(cmd) if cfg!(target_os = "linux") && matches!(cmd, "numfmt" | "tac") => true,
#[rustfmt::skip]
Some(
"cat" |
"cd" |
"cut" |
"echo" |
"expr" |
"false" |
"grep" |
"head" |
"id" |
"ls" |
"nl" |
"paste" |
"pwd" |
"rev" |
"seq" |
"stat" |
"tail" |
"tr" |
"true" |
"uname" |
"uniq" |
"wc" |
"which" |
"whoami") => {
true
},
Some("base64") => {
const UNSAFE_BASE64_OPTIONS: &[&str] = &["-o", "--output"];
!command.iter().skip(1).any(|arg| {
UNSAFE_BASE64_OPTIONS.contains(&arg.as_str())
|| arg.starts_with("--output=")
|| (arg.starts_with("-o") && arg != "-o")
})
}
Some("find") => {
// Certain options to `find` can delete files, write to files, or
// execute arbitrary commands, so we cannot auto-approve the
// invocation of `find` in such cases.
#[rustfmt::skip]
const UNSAFE_FIND_OPTIONS: &[&str] = &[
// Options that can execute arbitrary commands.
"-exec", "-execdir", "-ok", "-okdir",
// Option that deletes matching files.
"-delete",
// Options that write pathnames to a file.
"-fls", "-fprint", "-fprint0", "-fprintf",
];
!command
.iter()
.any(|arg| UNSAFE_FIND_OPTIONS.contains(&arg.as_str()))
}
// Ripgrep
Some("rg") => {
const UNSAFE_RIPGREP_OPTIONS_WITH_ARGS: &[&str] = &[
// Takes an arbitrary command that is executed for each match.
"--pre",
// Takes a command that can be used to obtain the local hostname.
"--hostname-bin",
];
const UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS: &[&str] = &[
// Calls out to other decompression tools, so do not auto-approve
// out of an abundance of caution.
"--search-zip",
"-z",
];
!command.iter().any(|arg| {
UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS.contains(&arg.as_str())
|| UNSAFE_RIPGREP_OPTIONS_WITH_ARGS
.iter()
.any(|&opt| arg == opt || arg.starts_with(&format!("{opt}=")))
})
}
// Git
Some("git") => {
// Global config overrides like `-c core.pager=...` can force git
// to execute arbitrary external commands. With no sandboxing, we
// should always prompt in those cases.
if git_has_config_override_global_option(command) {
return false;
}
let Some((subcommand_idx, subcommand)) =
find_git_subcommand(command, &["status", "log", "diff", "show", "branch"])
else {
return false;
};
let subcommand_args = &command[subcommand_idx + 1..];
match subcommand {
"status" | "log" | "diff" | "show" => {
git_subcommand_args_are_read_only(subcommand_args)
}
"branch" => {
git_subcommand_args_are_read_only(subcommand_args)
&& git_branch_is_read_only(subcommand_args)
}
other => {
debug_assert!(false, "unexpected git subcommand from matcher: {other}");
false
}
}
}
// Special-case `sed -n {N|M,N}p`
Some("sed")
if {
command.len() <= 4
&& command.get(1).map(String::as_str) == Some("-n")
&& is_valid_sed_n_arg(command.get(2).map(String::as_str))
} =>
{
true
}
// ── anything else ─────────────────────────────────────────────────
_ => false,
}
}
// Treat `git branch` as safe only when the arguments clearly indicate
// a read-only query, not a branch mutation (create/rename/delete).
fn git_branch_is_read_only(branch_args: &[String]) -> bool {
if branch_args.is_empty() {
// `git branch` with no additional args lists branches.
return true;
}
let mut saw_read_only_flag = false;
for arg in branch_args.iter().map(String::as_str) {
match arg {
"--list" | "-l" | "--show-current" | "-a" | "--all" | "-r" | "--remotes" | "-v"
| "-vv" | "--verbose" => {
saw_read_only_flag = true;
}
_ if arg.starts_with("--format=") => {
saw_read_only_flag = true;
}
_ => {
// Any other flag or positional argument may create, rename, or delete branches.
return false;
}
}
}
saw_read_only_flag
}
fn git_has_config_override_global_option(command: &[String]) -> bool {
command.iter().map(String::as_str).any(|arg| {
matches!(arg, "-c" | "--config-env")
|| (arg.starts_with("-c") && arg.len() > 2)
|| arg.starts_with("--config-env=")
})
}
fn git_subcommand_args_are_read_only(args: &[String]) -> bool {
// Flags that can write to disk or execute external tools should never be
// auto-approved on an unsandboxed machine.
const UNSAFE_GIT_FLAGS: &[&str] = &[
"--output",
"--ext-diff",
"--textconv",
"--exec",
"--paginate",
];
!args.iter().map(String::as_str).any(|arg| {
UNSAFE_GIT_FLAGS.contains(&arg)
|| arg.starts_with("--output=")
|| arg.starts_with("--exec=")
})
}
// (bash parsing helpers implemented in crate::bash)
/* ----------------------------------------------------------
Example
---------------------------------------------------------- */
/// Returns true if `arg` matches /^(\d+,)?\d+p$/
fn is_valid_sed_n_arg(arg: Option<&str>) -> bool {
// unwrap or bail
let s = match arg {
Some(s) => s,
None => return false,
};
// must end with 'p', strip it
let core = match s.strip_suffix('p') {
Some(rest) => rest,
None => return false,
};
// split on ',' and ensure 1 or 2 numeric parts
let parts: Vec<&str> = core.split(',').collect();
match parts.as_slice() {
// single number, e.g. "10"
[num] => !num.is_empty() && num.chars().all(|c| c.is_ascii_digit()),
// two numbers, e.g. "1,5"
[a, b] => {
!a.is_empty()
&& !b.is_empty()
&& a.chars().all(|c| c.is_ascii_digit())
&& b.chars().all(|c| c.is_ascii_digit())
}
// anything else (more than one comma) is invalid
_ => false,
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::string::ToString;
fn vec_str(args: &[&str]) -> Vec<String> {
args.iter().map(ToString::to_string).collect()
}
#[test]
fn known_safe_examples() {
assert!(is_safe_to_call_with_exec(&vec_str(&["ls"])));
assert!(is_safe_to_call_with_exec(&vec_str(&["git", "status"])));
assert!(is_safe_to_call_with_exec(&vec_str(&["git", "branch"])));
assert!(is_safe_to_call_with_exec(&vec_str(&[
"git",
"branch",
"--show-current"
])));
assert!(is_safe_to_call_with_exec(&vec_str(&["base64"])));
assert!(is_safe_to_call_with_exec(&vec_str(&[
"sed", "-n", "1,5p", "file.txt"
])));
assert!(is_safe_to_call_with_exec(&vec_str(&[
"nl",
"-nrz",
"Cargo.toml"
])));
// Safe `find` command (no unsafe options).
assert!(is_safe_to_call_with_exec(&vec_str(&[
"find", ".", "-name", "file.txt"
])));
if cfg!(target_os = "linux") {
assert!(is_safe_to_call_with_exec(&vec_str(&["numfmt", "1000"])));
assert!(is_safe_to_call_with_exec(&vec_str(&["tac", "Cargo.toml"])));
} else {
assert!(!is_safe_to_call_with_exec(&vec_str(&["numfmt", "1000"])));
assert!(!is_safe_to_call_with_exec(&vec_str(&["tac", "Cargo.toml"])));
}
}
#[test]
fn git_branch_mutating_flags_are_not_safe() {
assert!(!is_known_safe_command(&vec_str(&[
"git", "branch", "-d", "feature"
])));
assert!(!is_known_safe_command(&vec_str(&[
"git",
"branch",
"new-branch"
])));
}
#[test]
fn git_branch_global_options_respect_safety_rules() {
use pretty_assertions::assert_eq;
assert_eq!(
is_known_safe_command(&vec_str(&["git", "-C", ".", "branch", "--show-current"])),
true
);
assert_eq!(
is_known_safe_command(&vec_str(&["git", "-C", ".", "branch", "-d", "feature"])),
false
);
assert_eq!(
is_known_safe_command(&vec_str(&["bash", "-lc", "git -C . branch -d feature",])),
false
);
}
#[test]
fn git_first_positional_is_the_subcommand() {
// In git, the first non-option token is the subcommand. Later positional
// args (like branch names) must not be treated as subcommands.
assert!(!is_known_safe_command(&vec_str(&[
"git", "checkout", "status",
])));
}
#[test]
fn git_output_and_config_override_flags_are_not_safe() {
assert!(!is_known_safe_command(&vec_str(&[
"git",
"log",
"--output=/tmp/git-log-out-test",
"-n",
"1",
])));
assert!(!is_known_safe_command(&vec_str(&[
"git",
"diff",
"--output",
"/tmp/git-diff-out-test",
])));
assert!(!is_known_safe_command(&vec_str(&[
"git",
"show",
"--output=/tmp/git-show-out-test",
"HEAD",
])));
assert!(!is_known_safe_command(&vec_str(&[
"git",
"-c",
"core.pager=cat",
"log",
"-n",
"1",
])));
assert!(!is_known_safe_command(&vec_str(&[
"git",
"-ccore.pager=cat",
"status",
])));
}
#[test]
fn cargo_check_is_not_safe() {
assert!(!is_known_safe_command(&vec_str(&["cargo", "check"])));
}
#[test]
fn zsh_lc_safe_command_sequence() {
assert!(is_known_safe_command(&vec_str(&["zsh", "-lc", "ls"])));
}
#[test]
fn unknown_or_partial() {
assert!(!is_safe_to_call_with_exec(&vec_str(&["foo"])));
assert!(!is_safe_to_call_with_exec(&vec_str(&["git", "fetch"])));
assert!(!is_safe_to_call_with_exec(&vec_str(&[
"sed", "-n", "xp", "file.txt"
])));
// Unsafe `find` commands.
for args in [
vec_str(&["find", ".", "-name", "file.txt", "-exec", "rm", "{}", ";"]),
vec_str(&[
"find", ".", "-name", "*.py", "-execdir", "python3", "{}", ";",
]),
vec_str(&["find", ".", "-name", "file.txt", "-ok", "rm", "{}", ";"]),
vec_str(&["find", ".", "-name", "*.py", "-okdir", "python3", "{}", ";"]),
vec_str(&["find", ".", "-delete", "-name", "file.txt"]),
vec_str(&["find", ".", "-fls", "/etc/passwd"]),
vec_str(&["find", ".", "-fprint", "/etc/passwd"]),
vec_str(&["find", ".", "-fprint0", "/etc/passwd"]),
vec_str(&["find", ".", "-fprintf", "/root/suid.txt", "%#m %u %p\n"]),
] {
assert!(
!is_safe_to_call_with_exec(&args),
"expected {args:?} to be unsafe"
);
}
}
#[test]
fn base64_output_options_are_unsafe() {
for args in [
vec_str(&["base64", "-o", "out.bin"]),
vec_str(&["base64", "--output", "out.bin"]),
vec_str(&["base64", "--output=out.bin"]),
vec_str(&["base64", "-ob64.txt"]),
] {
assert!(
!is_safe_to_call_with_exec(&args),
"expected {args:?} to be considered unsafe due to output option"
);
}
}
#[test]
fn ripgrep_rules() {
// Safe ripgrep invocations none of the unsafe flags are present.
assert!(is_safe_to_call_with_exec(&vec_str(&[
"rg",
"Cargo.toml",
"-n"
])));
// Unsafe flags that do not take an argument (present verbatim).
for args in [
vec_str(&["rg", "--search-zip", "files"]),
vec_str(&["rg", "-z", "files"]),
] {
assert!(
!is_safe_to_call_with_exec(&args),
"expected {args:?} to be considered unsafe due to zip-search flag",
);
}
// Unsafe flags that expect a value, provided in both split and = forms.
for args in [
vec_str(&["rg", "--pre", "pwned", "files"]),
vec_str(&["rg", "--pre=pwned", "files"]),
vec_str(&["rg", "--hostname-bin", "pwned", "files"]),
vec_str(&["rg", "--hostname-bin=pwned", "files"]),
] {
assert!(
!is_safe_to_call_with_exec(&args),
"expected {args:?} to be considered unsafe due to external-command flag",
);
}
}
#[test]
fn windows_powershell_full_path_is_safe() {
if !cfg!(windows) {
// Windows only because on Linux path splitting doesn't handle `/` separators properly
return;
}
assert!(is_known_safe_command(&vec_str(&[
r"C:\Program Files\PowerShell\7\pwsh.exe",
"-Command",
"Get-Location",
])));
}
#[test]
fn bash_lc_safe_examples() {
assert!(is_known_safe_command(&vec_str(&["bash", "-lc", "ls"])));
assert!(is_known_safe_command(&vec_str(&["bash", "-lc", "ls -1"])));
assert!(is_known_safe_command(&vec_str(&[
"bash",
"-lc",
"git status"
])));
assert!(is_known_safe_command(&vec_str(&[
"bash",
"-lc",
"grep -R \"Cargo.toml\" -n"
])));
assert!(is_known_safe_command(&vec_str(&[
"bash",
"-lc",
"sed -n 1,5p file.txt"
])));
assert!(is_known_safe_command(&vec_str(&[
"bash",
"-lc",
"sed -n '1,5p' file.txt"
])));
assert!(is_known_safe_command(&vec_str(&[
"bash",
"-lc",
"find . -name file.txt"
])));
}
#[test]
fn bash_lc_safe_examples_with_operators() {
assert!(is_known_safe_command(&vec_str(&[
"bash",
"-lc",
"grep -R \"Cargo.toml\" -n || true"
])));
assert!(is_known_safe_command(&vec_str(&[
"bash",
"-lc",
"ls && pwd"
])));
assert!(is_known_safe_command(&vec_str(&[
"bash",
"-lc",
"echo 'hi' ; ls"
])));
assert!(is_known_safe_command(&vec_str(&[
"bash",
"-lc",
"ls | wc -l"
])));
}
#[test]
fn bash_lc_unsafe_examples() {
assert!(
!is_known_safe_command(&vec_str(&["bash", "-lc", "git", "status"])),
"Four arg version is not known to be safe."
);
assert!(
!is_known_safe_command(&vec_str(&["bash", "-lc", "'git status'"])),
"The extra quoting around 'git status' makes it a program named 'git status' and is therefore unsafe."
);
assert!(
!is_known_safe_command(&vec_str(&["bash", "-lc", "find . -name file.txt -delete"])),
"Unsafe find option should not be auto-approved."
);
// Disallowed because of unsafe command in sequence.
assert!(
!is_known_safe_command(&vec_str(&["bash", "-lc", "ls && rm -rf /"])),
"Sequence containing unsafe command must be rejected"
);
// Disallowed because of parentheses / subshell.
assert!(
!is_known_safe_command(&vec_str(&["bash", "-lc", "(ls)"])),
"Parentheses (subshell) are not provably safe with the current parser"
);
assert!(
!is_known_safe_command(&vec_str(&["bash", "-lc", "ls || (pwd && echo hi)"])),
"Nested parentheses are not provably safe with the current parser"
);
// Disallowed redirection.
assert!(
!is_known_safe_command(&vec_str(&["bash", "-lc", "ls > out.txt"])),
"> redirection should be rejected"
);
}
}

View File

@@ -0,0 +1,3 @@
pub mod is_dangerous_command;
pub mod is_safe_command;
pub mod windows_safe_commands;

View File

@@ -0,0 +1,201 @@
$ErrorActionPreference = 'Stop'
$payload = $env:CODEX_POWERSHELL_PAYLOAD
if ([string]::IsNullOrEmpty($payload)) {
Write-Output '{"status":"parse_failed"}'
exit 0
}
try {
$source =
[System.Text.Encoding]::Unicode.GetString(
[System.Convert]::FromBase64String($payload)
)
} catch {
Write-Output '{"status":"parse_failed"}'
exit 0
}
$tokens = $null
$errors = $null
$ast = $null
try {
$ast = [System.Management.Automation.Language.Parser]::ParseInput(
$source,
[ref]$tokens,
[ref]$errors
)
} catch {
Write-Output '{"status":"parse_failed"}'
exit 0
}
if ($errors.Count -gt 0) {
Write-Output '{"status":"parse_errors"}'
exit 0
}
function Convert-CommandElement {
param($element)
if ($element -is [System.Management.Automation.Language.StringConstantExpressionAst]) {
return @($element.Value)
}
if ($element -is [System.Management.Automation.Language.ExpandableStringExpressionAst]) {
if ($element.NestedExpressions.Count -gt 0) {
return $null
}
return @($element.Value)
}
if ($element -is [System.Management.Automation.Language.ConstantExpressionAst]) {
return @($element.Value.ToString())
}
if ($element -is [System.Management.Automation.Language.CommandParameterAst]) {
if ($element.Argument -eq $null) {
return @('-' + $element.ParameterName)
}
if ($element.Argument -is [System.Management.Automation.Language.StringConstantExpressionAst]) {
return @('-' + $element.ParameterName, $element.Argument.Value)
}
if ($element.Argument -is [System.Management.Automation.Language.ConstantExpressionAst]) {
return @('-' + $element.ParameterName, $element.Argument.Value.ToString())
}
return $null
}
return $null
}
function Convert-PipelineElement {
param($element)
if ($element -is [System.Management.Automation.Language.CommandAst]) {
if ($element.Redirections.Count -gt 0) {
return $null
}
if (
$element.InvocationOperator -ne $null -and
$element.InvocationOperator -ne [System.Management.Automation.Language.TokenKind]::Unknown
) {
return $null
}
$parts = @()
foreach ($commandElement in $element.CommandElements) {
$converted = Convert-CommandElement $commandElement
if ($converted -eq $null) {
return $null
}
$parts += $converted
}
return $parts
}
if ($element -is [System.Management.Automation.Language.CommandExpressionAst]) {
if ($element.Redirections.Count -gt 0) {
return $null
}
if ($element.Expression -is [System.Management.Automation.Language.ParenExpressionAst]) {
$innerPipeline = $element.Expression.Pipeline
if ($innerPipeline -and $innerPipeline.PipelineElements.Count -eq 1) {
return Convert-PipelineElement $innerPipeline.PipelineElements[0]
}
}
return $null
}
return $null
}
function Add-CommandsFromPipelineAst {
param($pipeline, $commands)
if ($pipeline.PipelineElements.Count -eq 0) {
return $false
}
foreach ($element in $pipeline.PipelineElements) {
$words = Convert-PipelineElement $element
if ($words -eq $null -or $words.Count -eq 0) {
return $false
}
$null = $commands.Add($words)
}
return $true
}
function Add-CommandsFromPipelineChain {
param($chain, $commands)
if (-not (Add-CommandsFromPipelineBase $chain.LhsPipelineChain $commands)) {
return $false
}
if (-not (Add-CommandsFromPipelineAst $chain.RhsPipeline $commands)) {
return $false
}
return $true
}
function Add-CommandsFromPipelineBase {
param($pipeline, $commands)
if ($pipeline -is [System.Management.Automation.Language.PipelineAst]) {
return Add-CommandsFromPipelineAst $pipeline $commands
}
if ($pipeline -is [System.Management.Automation.Language.PipelineChainAst]) {
return Add-CommandsFromPipelineChain $pipeline $commands
}
return $false
}
$commands = [System.Collections.ArrayList]::new()
foreach ($statement in $ast.EndBlock.Statements) {
if (-not (Add-CommandsFromPipelineBase $statement $commands)) {
$commands = $null
break
}
}
if ($commands -ne $null) {
$normalized = [System.Collections.ArrayList]::new()
foreach ($cmd in $commands) {
if ($cmd -is [string]) {
$null = $normalized.Add(@($cmd))
continue
}
if ($cmd -is [System.Array] -or $cmd -is [System.Collections.IEnumerable]) {
$null = $normalized.Add(@($cmd))
continue
}
$normalized = $null
break
}
$commands = $normalized
}
$result = if ($commands -eq $null) {
@{ status = 'unsupported' }
} else {
@{ status = 'ok'; commands = $commands }
}
,$result | ConvertTo-Json -Depth 3

View File

@@ -0,0 +1,755 @@
use std::path::Path;
use once_cell::sync::Lazy;
use regex::Regex;
use shlex::split as shlex_split;
use url::Url;
pub fn is_dangerous_command_windows(command: &[String]) -> bool {
// Prefer structured parsing for PowerShell/CMD so we can spot URL-bearing
// invocations of ShellExecute-style entry points before falling back to
// simple argv heuristics.
if is_dangerous_powershell(command) {
return true;
}
if is_dangerous_cmd(command) {
return true;
}
is_direct_gui_launch(command)
}
fn is_dangerous_powershell(command: &[String]) -> bool {
let Some((exe, rest)) = command.split_first() else {
return false;
};
if !is_powershell_executable(exe) {
return false;
}
// Parse the PowerShell invocation to get a flat token list we can scan for
// dangerous cmdlets/COM calls plus any URL-looking arguments. This is a
// best-effort shlex split of the script text, not a full PS parser.
let Some(parsed) = parse_powershell_invocation(rest) else {
return false;
};
let tokens_lc: Vec<String> = parsed
.tokens
.iter()
.map(|t| t.trim_matches('\'').trim_matches('"').to_ascii_lowercase())
.collect();
let has_url = args_have_url(&parsed.tokens);
if has_url
&& tokens_lc.iter().any(|t| {
matches!(
t.as_str(),
"start-process" | "start" | "saps" | "invoke-item" | "ii"
) || t.contains("start-process")
|| t.contains("invoke-item")
})
{
return true;
}
if has_url
&& tokens_lc
.iter()
.any(|t| t.contains("shellexecute") || t.contains("shell.application"))
{
return true;
}
if let Some(first) = tokens_lc.first() {
// Legacy ShellExecute path via url.dll
if first == "rundll32"
&& tokens_lc
.iter()
.any(|t| t.contains("url.dll,fileprotocolhandler"))
&& has_url
{
return true;
}
if first == "mshta" && has_url {
return true;
}
if is_browser_executable(first) && has_url {
return true;
}
if matches!(first.as_str(), "explorer" | "explorer.exe") && has_url {
return true;
}
}
// Check for force delete operations (e.g., Remove-Item -Force)
if has_force_delete_cmdlet(&tokens_lc) {
return true;
}
false
}
fn is_dangerous_cmd(command: &[String]) -> bool {
let Some((exe, rest)) = command.split_first() else {
return false;
};
let Some(base) = executable_basename(exe) else {
return false;
};
if base != "cmd" && base != "cmd.exe" {
return false;
}
let mut iter = rest.iter();
for arg in iter.by_ref() {
let lower = arg.to_ascii_lowercase();
match lower.as_str() {
"/c" | "/r" | "-c" => break,
_ if lower.starts_with('/') => continue,
// Unknown tokens before the command body => bail.
_ => return false,
}
}
let remaining: Vec<String> = iter.cloned().collect();
if remaining.is_empty() {
return false;
}
let cmd_tokens: Vec<String> = match remaining.as_slice() {
[only] => shlex_split(only).unwrap_or_else(|| vec![only.clone()]),
_ => remaining,
};
// Refine tokens by splitting concatenated CMD operators (e.g. "echo hi&del")
let tokens: Vec<String> = cmd_tokens
.into_iter()
.flat_map(|t| split_embedded_cmd_operators(&t))
.collect();
const CMD_SEPARATORS: &[&str] = &["&", "&&", "|", "||"];
tokens
.split(|t| CMD_SEPARATORS.contains(&t.as_str()))
.any(|segment| {
let Some(cmd) = segment.first() else {
return false;
};
// Classic `cmd /c ... start https://...` ShellExecute path.
if cmd.eq_ignore_ascii_case("start") && args_have_url(segment) {
return true;
}
// Force delete: del /f, erase /f
if (cmd.eq_ignore_ascii_case("del") || cmd.eq_ignore_ascii_case("erase"))
&& has_force_flag_cmd(segment)
{
return true;
}
// Recursive directory removal: rd /s /q, rmdir /s /q
if (cmd.eq_ignore_ascii_case("rd") || cmd.eq_ignore_ascii_case("rmdir"))
&& has_recursive_flag_cmd(segment)
&& has_quiet_flag_cmd(segment)
{
return true;
}
false
})
}
fn is_direct_gui_launch(command: &[String]) -> bool {
let Some((exe, rest)) = command.split_first() else {
return false;
};
let Some(base) = executable_basename(exe) else {
return false;
};
// Explorer/rundll32/mshta or direct browser exe with a URL anywhere in args.
if matches!(base.as_str(), "explorer" | "explorer.exe") && args_have_url(rest) {
return true;
}
if matches!(base.as_str(), "mshta" | "mshta.exe") && args_have_url(rest) {
return true;
}
if (base == "rundll32" || base == "rundll32.exe")
&& rest.iter().any(|t| {
t.to_ascii_lowercase()
.contains("url.dll,fileprotocolhandler")
})
&& args_have_url(rest)
{
return true;
}
if is_browser_executable(&base) && args_have_url(rest) {
return true;
}
false
}
fn split_embedded_cmd_operators(token: &str) -> Vec<String> {
// Split concatenated CMD operators so `echo hi&del` becomes `["echo hi", "&", "del"]`.
// Handles `&`, `&&`, `|`, `||`. Best-effort (CMD escaping is weird by nature).
let mut parts = Vec::new();
let mut start = 0;
let mut it = token.char_indices().peekable();
while let Some((i, ch)) = it.next() {
if ch == '&' || ch == '|' {
if i > start {
parts.push(token[start..i].to_string());
}
// Detect doubled operator: && or ||
let op_len = match it.peek() {
Some(&(j, next)) if next == ch => {
it.next(); // consume second char
(j + next.len_utf8()) - i
}
_ => ch.len_utf8(),
};
parts.push(token[i..i + op_len].to_string());
start = i + op_len;
}
}
if start < token.len() {
parts.push(token[start..].to_string());
}
parts.retain(|s| !s.trim().is_empty());
parts
}
fn has_force_delete_cmdlet(tokens: &[String]) -> bool {
const DELETE_CMDLETS: &[&str] = &["remove-item", "ri", "rm", "del", "erase", "rd", "rmdir"];
// Hard separators that end a command segment (so -Force must be in same segment)
const SEG_SEPS: &[char] = &[';', '|', '&', '\n', '\r', '\t'];
// Soft separators: punctuation that can stick to tokens (blocks, parens, brackets, commas, etc.)
const SOFT_SEPS: &[char] = &['{', '}', '(', ')', '[', ']', ',', ';'];
// Build rough command segments first
let mut segments: Vec<Vec<String>> = vec![Vec::new()];
for tok in tokens {
// If token itself contains segment separators, split it (best-effort)
let mut cur = String::new();
for ch in tok.chars() {
if SEG_SEPS.contains(&ch) {
let s = cur.trim();
if let Some(msg) = segments.last_mut()
&& !s.is_empty()
{
msg.push(s.to_string());
}
cur.clear();
if let Some(last) = segments.last()
&& !last.is_empty()
{
segments.push(Vec::new());
}
} else {
cur.push(ch);
}
}
let s = cur.trim();
if let Some(segment) = segments.last_mut()
&& !s.is_empty()
{
segment.push(s.to_string());
}
}
// Now, inside each segment, normalize tokens by splitting on soft punctuation
segments.into_iter().any(|seg| {
let atoms = seg
.iter()
.flat_map(|t| t.split(|c| SOFT_SEPS.contains(&c)))
.map(str::trim)
.filter(|s| !s.is_empty());
let mut has_delete = false;
let mut has_force = false;
for a in atoms {
if DELETE_CMDLETS.iter().any(|cmd| a.eq_ignore_ascii_case(cmd)) {
has_delete = true;
}
if a.eq_ignore_ascii_case("-force")
|| a.get(..7)
.is_some_and(|p| p.eq_ignore_ascii_case("-force:"))
{
has_force = true;
}
}
has_delete && has_force
})
}
/// Check for /f or /F flag in CMD del/erase arguments.
fn has_force_flag_cmd(args: &[String]) -> bool {
args.iter().any(|a| a.eq_ignore_ascii_case("/f"))
}
/// Check for /s or /S flag in CMD rd/rmdir arguments.
fn has_recursive_flag_cmd(args: &[String]) -> bool {
args.iter().any(|a| a.eq_ignore_ascii_case("/s"))
}
/// Check for /q or /Q flag in CMD rd/rmdir arguments.
fn has_quiet_flag_cmd(args: &[String]) -> bool {
args.iter().any(|a| a.eq_ignore_ascii_case("/q"))
}
fn args_have_url(args: &[String]) -> bool {
args.iter().any(|arg| looks_like_url(arg))
}
fn looks_like_url(token: &str) -> bool {
// Strip common PowerShell punctuation around inline URLs (quotes, parens, trailing semicolons).
// Capture the middle token after trimming leading quotes/parens/whitespace and trailing semicolons/closing parens.
static RE: Lazy<Option<Regex>> =
Lazy::new(|| Regex::new(r#"^[ "'\(\s]*([^\s"'\);]+)[\s;\)]*$"#).ok());
// If the token embeds a URL alongside other text (e.g., Start-Process('https://...'))
// as a single shlex token, grab the substring starting at the first URL prefix.
let urlish = token
.find("https://")
.or_else(|| token.find("http://"))
.map(|idx| &token[idx..])
.unwrap_or(token);
let candidate = RE
.as_ref()
.and_then(|re| re.captures(urlish))
.and_then(|caps| caps.get(1))
.map(|m| m.as_str())
.unwrap_or(urlish);
let Ok(url) = Url::parse(candidate) else {
return false;
};
matches!(url.scheme(), "http" | "https")
}
fn executable_basename(exe: &str) -> Option<String> {
Path::new(exe)
.file_name()
.and_then(|osstr| osstr.to_str())
.map(str::to_ascii_lowercase)
}
fn is_powershell_executable(exe: &str) -> bool {
matches!(
executable_basename(exe).as_deref(),
Some("powershell") | Some("powershell.exe") | Some("pwsh") | Some("pwsh.exe")
)
}
fn is_browser_executable(name: &str) -> bool {
matches!(
name,
"chrome"
| "chrome.exe"
| "msedge"
| "msedge.exe"
| "firefox"
| "firefox.exe"
| "iexplore"
| "iexplore.exe"
)
}
struct ParsedPowershell {
tokens: Vec<String>,
}
fn parse_powershell_invocation(args: &[String]) -> Option<ParsedPowershell> {
if args.is_empty() {
return None;
}
let mut idx = 0;
while idx < args.len() {
let arg = &args[idx];
let lower = arg.to_ascii_lowercase();
match lower.as_str() {
"-command" | "/command" | "-c" => {
let script = args.get(idx + 1)?;
if idx + 2 != args.len() {
return None;
}
let tokens = shlex_split(script)?;
return Some(ParsedPowershell { tokens });
}
_ if lower.starts_with("-command:") || lower.starts_with("/command:") => {
if idx + 1 != args.len() {
return None;
}
let (_, script) = arg.split_once(':')?;
let tokens = shlex_split(script)?;
return Some(ParsedPowershell { tokens });
}
"-nologo" | "-noprofile" | "-noninteractive" | "-mta" | "-sta" => {
idx += 1;
}
_ if lower.starts_with('-') => {
idx += 1;
}
_ => {
let rest = args[idx..].to_vec();
return Some(ParsedPowershell { tokens: rest });
}
}
}
None
}
#[cfg(test)]
mod tests {
use super::is_dangerous_command_windows;
fn vec_str(items: &[&str]) -> Vec<String> {
items.iter().map(std::string::ToString::to_string).collect()
}
#[test]
fn powershell_start_process_url_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"powershell",
"-NoLogo",
"-Command",
"Start-Process 'https://example.com'"
])));
}
#[test]
fn powershell_start_process_url_with_trailing_semicolon_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"Start-Process('https://example.com');"
])));
}
#[test]
fn powershell_start_process_local_is_not_flagged() {
assert!(!is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"Start-Process notepad.exe"
])));
}
#[test]
fn cmd_start_with_url_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd",
"/c",
"start",
"https://example.com"
])));
}
#[test]
fn msedge_with_url_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"msedge.exe",
"https://example.com"
])));
}
#[test]
fn explorer_with_directory_is_not_flagged() {
assert!(!is_dangerous_command_windows(&vec_str(&[
"explorer.exe",
"."
])));
}
// Force delete tests for PowerShell
#[test]
fn powershell_remove_item_force_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"Remove-Item test -Force"
])));
}
#[test]
fn powershell_remove_item_recurse_force_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"Remove-Item test -Recurse -Force"
])));
}
#[test]
fn powershell_ri_alias_force_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"pwsh",
"-Command",
"ri test -Force"
])));
}
#[test]
fn powershell_remove_item_without_force_is_not_flagged() {
assert!(!is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"Remove-Item test"
])));
}
// Force delete tests for CMD
#[test]
fn cmd_del_force_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd", "/c", "del", "/f", "test.txt"
])));
}
#[test]
fn cmd_erase_force_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd", "/c", "erase", "/f", "test.txt"
])));
}
#[test]
fn cmd_del_without_force_is_not_flagged() {
assert!(!is_dangerous_command_windows(&vec_str(&[
"cmd", "/c", "del", "test.txt"
])));
}
#[test]
fn cmd_rd_recursive_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd", "/c", "rd", "/s", "/q", "test"
])));
}
#[test]
fn cmd_rd_without_quiet_is_not_flagged() {
assert!(!is_dangerous_command_windows(&vec_str(&[
"cmd", "/c", "rd", "/s", "test"
])));
}
#[test]
fn cmd_rmdir_recursive_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd", "/c", "rmdir", "/s", "/q", "test"
])));
}
// Test exact scenario from issue #8567
#[test]
fn powershell_remove_item_path_recurse_force_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"Remove-Item -Path 'test' -Recurse -Force"
])));
}
#[test]
fn powershell_remove_item_force_with_semicolon_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"Remove-Item test -Force; Write-Host done"
])));
}
#[test]
fn powershell_remove_item_force_inside_block_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"if ($true) { Remove-Item test -Force}"
])));
}
#[test]
fn powershell_remove_item_force_inside_brackets_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"[void]( Remove-Item test -Force)]"
])));
}
#[test]
fn cmd_del_path_containing_f_is_not_flagged() {
assert!(!is_dangerous_command_windows(&vec_str(&[
"cmd",
"/c",
"del",
"C:/foo/bar.txt"
])));
}
#[test]
fn cmd_rd_path_containing_s_is_not_flagged() {
assert!(!is_dangerous_command_windows(&vec_str(&[
"cmd",
"/c",
"rd",
"C:/source"
])));
}
#[test]
fn cmd_bypass_chained_del_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd", "/c", "echo", "hello", "&", "del", "/f", "file.txt"
])));
}
#[test]
fn powershell_chained_no_space_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"Write-Host hi;Remove-Item -Force C:\\tmp"
])));
}
#[test]
fn powershell_comma_separated_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"del,-Force,C:\\foo"
])));
}
#[test]
fn cmd_echo_del_is_not_dangerous() {
assert!(!is_dangerous_command_windows(&vec_str(&[
"cmd", "/c", "echo", "del", "/f"
])));
}
#[test]
fn cmd_del_single_string_argument_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd",
"/c",
"del /f file.txt"
])));
}
#[test]
fn cmd_del_chained_single_string_argument_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd",
"/c",
"echo hello & del /f file.txt"
])));
}
#[test]
fn cmd_chained_no_space_del_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd",
"/c",
"echo hi&del /f file.txt"
])));
}
#[test]
fn cmd_chained_andand_no_space_del_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd",
"/c",
"echo hi&&del /f file.txt"
])));
}
#[test]
fn cmd_chained_oror_no_space_del_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd",
"/c",
"echo hi||del /f file.txt"
])));
}
#[test]
fn cmd_start_url_single_string_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd",
"/c",
"start https://example.com"
])));
}
#[test]
fn cmd_chained_no_space_rmdir_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd",
"/c",
"echo hi&rmdir /s /q testdir"
])));
}
#[test]
fn cmd_del_force_uppercase_flag_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd", "/c", "DEL", "/F", "file.txt"
])));
}
#[test]
fn cmdexe_r_del_force_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd.exe", "/r", "del", "/f", "file.txt"
])));
}
#[test]
fn cmd_start_quoted_url_single_string_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd",
"/c",
r#"start "https://example.com""#
])));
}
#[test]
fn cmd_start_title_then_url_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd",
"/c",
r#"start "" https://example.com"#
])));
}
#[test]
fn powershell_rm_alias_force_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"rm test -Force"
])));
}
#[test]
fn powershell_benign_force_separate_command_is_not_dangerous() {
assert!(!is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"Get-ChildItem -Force; Remove-Item test"
])));
}
}

View File

@@ -0,0 +1,623 @@
use base64::Engine;
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
use serde::Deserialize;
use std::path::Path;
use std::process::Command;
use std::sync::LazyLock;
const POWERSHELL_PARSER_SCRIPT: &str = include_str!("powershell_parser.ps1");
/// On Windows, we conservatively allow only clearly read-only PowerShell invocations
/// that match a small safelist. Anything else (including direct CMD commands) is unsafe.
pub fn is_safe_command_windows(command: &[String]) -> bool {
if let Some(commands) = try_parse_powershell_command_sequence(command) {
commands
.iter()
.all(|cmd| is_safe_powershell_command(cmd.as_slice()))
} else {
// Only PowerShell invocations are allowed on Windows for now; anything else is unsafe.
false
}
}
/// Returns each command sequence if the invocation starts with a PowerShell binary.
/// For example, the tokens from `pwsh Get-ChildItem | Measure-Object` become two sequences.
fn try_parse_powershell_command_sequence(command: &[String]) -> Option<Vec<Vec<String>>> {
let (exe, rest) = command.split_first()?;
if is_powershell_executable(exe) {
parse_powershell_invocation(exe, rest)
} else {
None
}
}
/// Parses a PowerShell invocation into discrete command vectors, rejecting unsafe patterns.
fn parse_powershell_invocation(executable: &str, args: &[String]) -> Option<Vec<Vec<String>>> {
if args.is_empty() {
// Examples rejected here: "pwsh" and "powershell.exe" with no additional arguments.
return None;
}
let mut idx = 0;
while idx < args.len() {
let arg = &args[idx];
let lower = arg.to_ascii_lowercase();
match lower.as_str() {
"-command" | "/command" | "-c" => {
let script = args.get(idx + 1)?;
if idx + 2 != args.len() {
// Reject if there is more than one token representing the actual command.
// Examples rejected here: "pwsh -Command foo bar" and "powershell -c ls extra".
return None;
}
return parse_powershell_script(executable, script);
}
_ if lower.starts_with("-command:") || lower.starts_with("/command:") => {
if idx + 1 != args.len() {
// Reject if there are more tokens after the command itself.
// Examples rejected here: "pwsh -Command:dir C:\\" and "powershell /Command:dir C:\\" with trailing args.
return None;
}
let script = arg.split_once(':')?.1;
return parse_powershell_script(executable, script);
}
// Benign, no-arg flags we tolerate.
"-nologo" | "-noprofile" | "-noninteractive" | "-mta" | "-sta" => {
idx += 1;
continue;
}
// Explicitly forbidden/opaque or unnecessary for read-only operations.
"-encodedcommand" | "-ec" | "-file" | "/file" | "-windowstyle" | "-executionpolicy"
| "-workingdirectory" => {
// Examples rejected here: "pwsh -EncodedCommand ..." and "powershell -File script.ps1".
return None;
}
// Unknown switch → bail conservatively.
_ if lower.starts_with('-') => {
// Examples rejected here: "pwsh -UnknownFlag" and "powershell -foo bar".
return None;
}
// If we hit non-flag tokens, treat the remainder as a command sequence.
// This happens if powershell is invoked without -Command, e.g.
// ["pwsh", "-NoLogo", "git", "-c", "core.pager=cat", "status"]
_ => {
let script = join_arguments_as_script(&args[idx..]);
return parse_powershell_script(executable, &script);
}
}
}
// Examples rejected here: "pwsh" and "powershell.exe -NoLogo" without a script.
None
}
/// Tokenizes an inline PowerShell script and delegates to the command splitter.
/// Examples of when this is called: pwsh.exe -Command '<script>' or pwsh.exe -Command:<script>
fn parse_powershell_script(executable: &str, script: &str) -> Option<Vec<Vec<String>>> {
if let PowershellParseOutcome::Commands(commands) =
parse_with_powershell_ast(executable, script)
{
Some(commands)
} else {
None
}
}
/// Returns true when the executable name is one of the supported PowerShell binaries.
fn is_powershell_executable(exe: &str) -> bool {
let executable_name = Path::new(exe)
.file_name()
.and_then(|osstr| osstr.to_str())
.unwrap_or(exe)
.to_ascii_lowercase();
matches!(
executable_name.as_str(),
"powershell" | "powershell.exe" | "pwsh" | "pwsh.exe"
)
}
/// Attempts to parse PowerShell using the real PowerShell parser, returning every pipeline element
/// as a flat argv vector when possible. If parsing fails or the AST includes unsupported constructs,
/// we conservatively reject the command instead of trying to split it manually.
fn parse_with_powershell_ast(executable: &str, script: &str) -> PowershellParseOutcome {
let encoded_script = encode_powershell_base64(script);
let encoded_parser_script = encoded_parser_script();
match Command::new(executable)
.args([
"-NoLogo",
"-NoProfile",
"-NonInteractive",
"-EncodedCommand",
encoded_parser_script,
])
.env("CODEX_POWERSHELL_PAYLOAD", &encoded_script)
.output()
{
Ok(output) if output.status.success() => {
if let Ok(result) =
serde_json::from_slice::<PowershellParserOutput>(output.stdout.as_slice())
{
result.into_outcome()
} else {
PowershellParseOutcome::Failed
}
}
_ => PowershellParseOutcome::Failed,
}
}
fn encode_powershell_base64(script: &str) -> String {
let mut utf16 = Vec::with_capacity(script.len() * 2);
for unit in script.encode_utf16() {
utf16.extend_from_slice(&unit.to_le_bytes());
}
BASE64_STANDARD.encode(utf16)
}
fn encoded_parser_script() -> &'static str {
static ENCODED: LazyLock<String> =
LazyLock::new(|| encode_powershell_base64(POWERSHELL_PARSER_SCRIPT));
&ENCODED
}
#[derive(Deserialize)]
#[serde(deny_unknown_fields)]
struct PowershellParserOutput {
status: String,
commands: Option<Vec<Vec<String>>>,
}
impl PowershellParserOutput {
fn into_outcome(self) -> PowershellParseOutcome {
match self.status.as_str() {
"ok" => self
.commands
.filter(|commands| {
!commands.is_empty()
&& commands
.iter()
.all(|cmd| !cmd.is_empty() && cmd.iter().all(|word| !word.is_empty()))
})
.map(PowershellParseOutcome::Commands)
.unwrap_or(PowershellParseOutcome::Unsupported),
"unsupported" => PowershellParseOutcome::Unsupported,
_ => PowershellParseOutcome::Failed,
}
}
}
enum PowershellParseOutcome {
Commands(Vec<Vec<String>>),
Unsupported,
Failed,
}
fn join_arguments_as_script(args: &[String]) -> String {
let mut words = Vec::with_capacity(args.len());
if let Some((first, rest)) = args.split_first() {
words.push(first.clone());
for arg in rest {
words.push(quote_argument(arg));
}
}
words.join(" ")
}
fn quote_argument(arg: &str) -> String {
if arg.is_empty() {
return "''".to_string();
}
if arg.chars().all(|ch| !ch.is_whitespace()) {
return arg.to_string();
}
format!("'{}'", arg.replace('\'', "''"))
}
/// Validates that a parsed PowerShell command stays within our read-only safelist.
/// Everything before this is parsing, and rejecting things that make us feel uncomfortable.
fn is_safe_powershell_command(words: &[String]) -> bool {
if words.is_empty() {
// Examples rejected here: "pwsh -Command ''" and "pwsh -Command \"\"".
return false;
}
// Reject nested unsafe cmdlets inside parentheses or arguments
for w in words.iter() {
let inner = w
.trim_matches(|c| c == '(' || c == ')')
.trim_start_matches('-')
.to_ascii_lowercase();
if matches!(
inner.as_str(),
"set-content"
| "add-content"
| "out-file"
| "new-item"
| "remove-item"
| "move-item"
| "copy-item"
| "rename-item"
| "start-process"
| "stop-process"
) {
// Examples rejected here: "Write-Output (Set-Content foo6.txt 'abc')" and "Get-Content (New-Item bar.txt)".
return false;
}
}
let command = words[0]
.trim_matches(|c| c == '(' || c == ')')
.trim_start_matches('-')
.to_ascii_lowercase();
match command.as_str() {
"echo" | "write-output" | "write-host" => true, // (no redirection allowed)
"dir" | "ls" | "get-childitem" | "gci" => true,
"cat" | "type" | "gc" | "get-content" => true,
"select-string" | "sls" | "findstr" => true,
"measure-object" | "measure" => true,
"get-location" | "gl" | "pwd" => true,
"test-path" | "tp" => true,
"resolve-path" | "rvpa" => true,
"select-object" | "select" => true,
"get-item" => true,
"git" => is_safe_git_command(words),
"rg" => is_safe_ripgrep(words),
// Extra safety: explicitly prohibit common side-effecting cmdlets regardless of args.
"set-content" | "add-content" | "out-file" | "new-item" | "remove-item" | "move-item"
| "copy-item" | "rename-item" | "start-process" | "stop-process" => {
// Examples rejected here: "pwsh -Command 'Set-Content notes.txt data'" and "pwsh -Command 'Remove-Item temp.log'".
false
}
_ => {
// Examples rejected here: "pwsh -Command 'Invoke-WebRequest https://example.com'" and "pwsh -Command 'Start-Service Spooler'".
false
}
}
}
/// Checks that an `rg` invocation avoids options that can spawn arbitrary executables.
fn is_safe_ripgrep(words: &[String]) -> bool {
const UNSAFE_RIPGREP_OPTIONS_WITH_ARGS: &[&str] = &["--pre", "--hostname-bin"];
const UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS: &[&str] = &["--search-zip", "-z"];
!words.iter().skip(1).any(|arg| {
let arg_lc = arg.to_ascii_lowercase();
// Examples rejected here: "pwsh -Command 'rg --pre cat pattern'" and "pwsh -Command 'rg --search-zip pattern'".
UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS.contains(&arg_lc.as_str())
|| UNSAFE_RIPGREP_OPTIONS_WITH_ARGS
.iter()
.any(|opt| arg_lc == *opt || arg_lc.starts_with(&format!("{opt}=")))
})
}
/// Ensures a Git command sticks to whitelisted read-only subcommands and flags.
fn is_safe_git_command(words: &[String]) -> bool {
const SAFE_SUBCOMMANDS: &[&str] = &["status", "log", "show", "diff", "cat-file"];
let mut iter = words.iter().skip(1);
while let Some(arg) = iter.next() {
let arg_lc = arg.to_ascii_lowercase();
if arg.starts_with('-') {
if arg.eq_ignore_ascii_case("-c") || arg.eq_ignore_ascii_case("--config") {
if iter.next().is_none() {
// Examples rejected here: "pwsh -Command 'git -c'" and "pwsh -Command 'git --config'".
return false;
}
continue;
}
if arg_lc.starts_with("-c=")
|| arg_lc.starts_with("--config=")
|| arg_lc.starts_with("--git-dir=")
|| arg_lc.starts_with("--work-tree=")
{
continue;
}
if arg.eq_ignore_ascii_case("--git-dir") || arg.eq_ignore_ascii_case("--work-tree") {
if iter.next().is_none() {
// Examples rejected here: "pwsh -Command 'git --git-dir'" and "pwsh -Command 'git --work-tree'".
return false;
}
continue;
}
continue;
}
return SAFE_SUBCOMMANDS.contains(&arg_lc.as_str());
}
// Examples rejected here: "pwsh -Command 'git'" and "pwsh -Command 'git status --short | Remove-Item foo'".
false
}
#[cfg(all(test, windows))]
mod tests {
use super::*;
use crate::powershell::try_find_pwsh_executable_blocking;
use std::string::ToString;
/// Converts a slice of string literals into owned `String`s for the tests.
fn vec_str(args: &[&str]) -> Vec<String> {
args.iter().map(ToString::to_string).collect()
}
#[test]
fn recognizes_safe_powershell_wrappers() {
assert!(is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-NoLogo",
"-Command",
"Get-ChildItem -Path .",
])));
assert!(is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-NoProfile",
"-Command",
"git status",
])));
assert!(is_safe_command_windows(&vec_str(&[
"powershell.exe",
"Get-Content",
"Cargo.toml",
])));
// pwsh parity
if let Some(pwsh) = try_find_pwsh_executable_blocking() {
assert!(is_safe_command_windows(&[
pwsh.as_path().to_str().unwrap().into(),
"-NoProfile".to_string(),
"-Command".to_string(),
"Get-ChildItem".to_string(),
]));
}
}
#[test]
fn accepts_full_path_powershell_invocations() {
if !cfg!(windows) {
// Windows only because on Linux path splitting doesn't handle `/` separators properly
return;
}
if let Some(pwsh) = try_find_pwsh_executable_blocking() {
assert!(is_safe_command_windows(&[
pwsh.as_path().to_str().unwrap().into(),
"-NoProfile".to_string(),
"-Command".to_string(),
"Get-ChildItem -Path .".to_string(),
]));
}
assert!(is_safe_command_windows(&vec_str(&[
r"C:\Windows\System32\WindowsPowerShell\v1.0\powershell.exe",
"-Command",
"Get-Content Cargo.toml",
])));
}
#[test]
fn allows_read_only_pipelines_and_git_usage() {
let Some(pwsh) = try_find_pwsh_executable_blocking() else {
return;
};
let pwsh: String = pwsh.as_path().to_str().unwrap().into();
assert!(is_safe_command_windows(&[
pwsh.clone(),
"-NoLogo".to_string(),
"-NoProfile".to_string(),
"-Command".to_string(),
"rg --files-with-matches foo | Measure-Object | Select-Object -ExpandProperty Count"
.to_string()
]));
assert!(is_safe_command_windows(&[
pwsh.clone(),
"-NoLogo".to_string(),
"-NoProfile".to_string(),
"-Command".to_string(),
"Get-Content foo.rs | Select-Object -Skip 200".to_string()
]));
assert!(is_safe_command_windows(&[
pwsh.clone(),
"-NoLogo".to_string(),
"-NoProfile".to_string(),
"-Command".to_string(),
"git -c core.pager=cat show HEAD:foo.rs".to_string()
]));
assert!(is_safe_command_windows(&[
pwsh.clone(),
"-Command".to_string(),
"-git cat-file -p HEAD:foo.rs".to_string()
]));
assert!(is_safe_command_windows(&[
pwsh.clone(),
"-Command".to_string(),
"(Get-Content foo.rs -Raw)".to_string()
]));
assert!(is_safe_command_windows(&[
pwsh,
"-Command".to_string(),
"Get-Item foo.rs | Select-Object Length".to_string()
]));
}
#[test]
fn rejects_powershell_commands_with_side_effects() {
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-NoLogo",
"-Command",
"Remove-Item foo.txt",
])));
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-NoProfile",
"-Command",
"rg --pre cat",
])));
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"Set-Content foo.txt 'hello'",
])));
// Redirections are blocked
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"echo hi > out.txt",
])));
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"Get-Content x | Out-File y",
])));
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"Write-Output foo 2> err.txt",
])));
// Call operator is blocked
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"& Remove-Item foo",
])));
// Chained safe + unsafe must fail
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"Get-ChildItem; Remove-Item foo",
])));
// Nested unsafe cmdlet inside safe command must fail
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"Write-Output (Set-Content foo6.txt 'abc')",
])));
// Additional nested unsafe cmdlet examples must fail
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"Write-Host (Remove-Item foo.txt)",
])));
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"Get-Content (New-Item bar.txt)",
])));
// Unsafe @ expansion.
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"ls @(calc.exe)"
])));
// Unsupported constructs that the AST parser refuses (no fallback to manual splitting).
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"ls && pwd"
])));
// Sub-expressions are rejected even if they contain otherwise safe commands.
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"Write-Output $(Get-Content foo)"
])));
// Empty words from the parser (e.g. '') are rejected.
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"''"
])));
}
#[test]
fn accepts_constant_expression_arguments() {
assert!(is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"Get-Content 'foo bar'"
])));
assert!(is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"Get-Content \"foo bar\""
])));
}
#[test]
fn rejects_dynamic_arguments() {
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"Get-Content $foo"
])));
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"Write-Output \"foo $bar\""
])));
}
#[test]
fn uses_invoked_powershell_variant_for_parsing() {
if !cfg!(windows) {
return;
}
let chain = "pwd && ls";
assert!(
!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-NoProfile",
"-Command",
chain,
])),
"`{chain}` is not recognized by powershell.exe"
);
if let Some(pwsh) = try_find_pwsh_executable_blocking() {
assert!(
is_safe_command_windows(&[
pwsh.as_path().to_str().unwrap().into(),
"-NoProfile".to_string(),
"-Command".to_string(),
chain.to_string(),
]),
"`{chain}` should be considered safe to pwsh.exe"
);
}
}
}

View File

@@ -0,0 +1,11 @@
//! Command parsing and safety utilities shared across Codex crates.
mod shell_detect;
pub mod bash;
pub mod command_safety;
pub mod parse_command;
pub mod powershell;
pub use command_safety::is_dangerous_command;
pub use command_safety::is_safe_command;

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,204 @@
use std::path::PathBuf;
use codex_utils_absolute_path::AbsolutePathBuf;
use crate::shell_detect::ShellType;
use crate::shell_detect::detect_shell_type;
const POWERSHELL_FLAGS: &[&str] = &["-nologo", "-noprofile", "-command", "-c"];
/// Prefixed command for powershell shell calls to force UTF-8 console output.
pub const UTF8_OUTPUT_PREFIX: &str = "[Console]::OutputEncoding=[System.Text.Encoding]::UTF8;\n";
pub fn prefix_powershell_script_with_utf8(command: &[String]) -> Vec<String> {
let Some((_, script)) = extract_powershell_command(command) else {
return command.to_vec();
};
let trimmed = script.trim_start();
let script = if trimmed.starts_with(UTF8_OUTPUT_PREFIX) {
script.to_string()
} else {
format!("{UTF8_OUTPUT_PREFIX}{script}")
};
let mut command: Vec<String> = command[..(command.len() - 1)]
.iter()
.map(std::string::ToString::to_string)
.collect();
command.push(script);
command
}
/// Extract the PowerShell script body from an invocation such as:
///
/// - ["pwsh", "-NoProfile", "-Command", "Get-ChildItem -Recurse | Select-String foo"]
/// - ["powershell.exe", "-Command", "Write-Host hi"]
/// - ["powershell", "-NoLogo", "-NoProfile", "-Command", "...script..."]
///
/// Returns (`shell`, `script`) when the first arg is a PowerShell executable and a
/// `-Command` (or `-c`) flag is present followed by a script string.
pub fn extract_powershell_command(command: &[String]) -> Option<(&str, &str)> {
if command.len() < 3 {
return None;
}
let shell = &command[0];
if !matches!(
detect_shell_type(&PathBuf::from(shell)),
Some(ShellType::PowerShell)
) {
return None;
}
// Find the first occurrence of -Command (accept common short alias -c as well)
let mut i = 1usize;
while i + 1 < command.len() {
let flag = &command[i];
// Reject unknown flags
if !POWERSHELL_FLAGS.contains(&flag.to_ascii_lowercase().as_str()) {
return None;
}
if flag.eq_ignore_ascii_case("-Command") || flag.eq_ignore_ascii_case("-c") {
let script = &command[i + 1];
return Some((shell, script));
}
i += 1;
}
None
}
/// This function attempts to find a valid PowerShell executable on the system.
/// It first tries to find pwsh.exe, and if that fails, it tries to find
/// powershell.exe.
#[cfg(windows)]
#[allow(dead_code)]
pub(crate) fn try_find_powershellish_executable_blocking() -> Option<AbsolutePathBuf> {
if let Some(pwsh_path) = try_find_pwsh_executable_blocking() {
Some(pwsh_path)
} else {
try_find_powershell_executable_blocking()
}
}
/// This function attempts to find a powershell.exe executable on the system.
pub fn try_find_powershell_executable_blocking() -> Option<AbsolutePathBuf> {
try_find_powershellish_executable_in_path(&["powershell.exe"])
}
/// This function attempts to find a pwsh.exe executable on the system.
/// Note that pwsh.exe and powershell.exe are different executables:
///
/// - pwsh.exe is the cross-platform PowerShell Core (v6+) executable
/// - powershell.exe is the Windows PowerShell (v5.1 and earlier) executable
///
/// Further, while powershell.exe is included by default on Windows systems,
/// pwsh.exe must be installed separately by the user. And even when the user
/// has installed pwsh.exe, it may not be available in the system PATH, in which
/// case we attempt to locate it via other means.
pub fn try_find_pwsh_executable_blocking() -> Option<AbsolutePathBuf> {
if let Some(ps_home) = std::process::Command::new("cmd")
.args(["/C", "pwsh", "-NoProfile", "-Command", "$PSHOME"])
.output()
.ok()
.and_then(|out| {
if !out.status.success() {
return None;
}
let stdout = String::from_utf8_lossy(&out.stdout);
let trimmed = stdout.trim();
(!trimmed.is_empty()).then(|| trimmed.to_string())
})
{
let candidate = AbsolutePathBuf::resolve_path_against_base("pwsh.exe", &ps_home);
if let Ok(candidate_abs_path) = candidate
&& is_powershellish_executable_available(candidate_abs_path.as_path())
{
return Some(candidate_abs_path);
}
}
try_find_powershellish_executable_in_path(&["pwsh.exe"])
}
fn try_find_powershellish_executable_in_path(candidates: &[&str]) -> Option<AbsolutePathBuf> {
for candidate in candidates {
let Ok(resolved_path) = which::which(candidate) else {
continue;
};
if !is_powershellish_executable_available(&resolved_path) {
continue;
}
let Ok(abs_path) = AbsolutePathBuf::from_absolute_path(resolved_path) else {
continue;
};
return Some(abs_path);
}
None
}
fn is_powershellish_executable_available(powershell_or_pwsh_exe: &std::path::Path) -> bool {
// This test works for both powershell.exe and pwsh.exe.
std::process::Command::new(powershell_or_pwsh_exe)
.args(["-NoLogo", "-NoProfile", "-Command", "Write-Output ok"])
.output()
.map(|output| output.status.success())
.unwrap_or(false)
}
#[cfg(test)]
mod tests {
use super::extract_powershell_command;
#[test]
fn extracts_basic_powershell_command() {
let cmd = vec![
"powershell".to_string(),
"-Command".to_string(),
"Write-Host hi".to_string(),
];
let (_shell, script) = extract_powershell_command(&cmd).expect("extract");
assert_eq!(script, "Write-Host hi");
}
#[test]
fn extracts_lowercase_flags() {
let cmd = vec![
"powershell".to_string(),
"-nologo".to_string(),
"-command".to_string(),
"Write-Host hi".to_string(),
];
let (_shell, script) = extract_powershell_command(&cmd).expect("extract");
assert_eq!(script, "Write-Host hi");
}
#[test]
fn extracts_full_path_powershell_command() {
let command = if cfg!(windows) {
"C:\\windows\\System32\\WindowsPowerShell\\v1.0\\powershell.exe".to_string()
} else {
"/usr/local/bin/powershell.exe".to_string()
};
let cmd = vec![command, "-Command".to_string(), "Write-Host hi".to_string()];
let (_shell, script) = extract_powershell_command(&cmd).expect("extract");
assert_eq!(script, "Write-Host hi");
}
#[test]
fn extracts_with_noprofile_and_alias() {
let cmd = vec![
"pwsh".to_string(),
"-NoProfile".to_string(),
"-c".to_string(),
"Get-ChildItem | Select-String foo".to_string(),
];
let (_shell, script) = extract_powershell_command(&cmd).expect("extract");
assert_eq!(script, "Get-ChildItem | Select-String foo");
}
}

View File

@@ -0,0 +1,32 @@
use std::path::Path;
use std::path::PathBuf;
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub(crate) enum ShellType {
Zsh,
Bash,
PowerShell,
Sh,
Cmd,
}
pub(crate) fn detect_shell_type(shell_path: &PathBuf) -> Option<ShellType> {
match shell_path.as_os_str().to_str() {
Some("zsh") => Some(ShellType::Zsh),
Some("sh") => Some(ShellType::Sh),
Some("cmd") => Some(ShellType::Cmd),
Some("bash") => Some(ShellType::Bash),
Some("pwsh") => Some(ShellType::PowerShell),
Some("powershell") => Some(ShellType::PowerShell),
_ => {
let shell_name = shell_path.file_stem();
if let Some(shell_name) = shell_name {
let shell_name_path = Path::new(shell_name);
if shell_name_path != Path::new(shell_path) {
return detect_shell_type(&shell_name_path.to_path_buf());
}
}
None
}
}
}