mirror of
https://github.com/openai/codex.git
synced 2026-05-01 20:02:05 +03:00
chore: rename codex-command to codex-shell-command (#11378)
This addresses some post-merge feedback on https://github.com/openai/codex/pull/11361: - crate rename - reuse `detect_shell_type()` utility
This commit is contained in:
565
codex-rs/shell-command/src/bash.rs
Normal file
565
codex-rs/shell-command/src/bash.rs
Normal file
@@ -0,0 +1,565 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use tree_sitter::Node;
|
||||
use tree_sitter::Parser;
|
||||
use tree_sitter::Tree;
|
||||
use tree_sitter_bash::LANGUAGE as BASH;
|
||||
|
||||
use crate::shell_detect::ShellType;
|
||||
use crate::shell_detect::detect_shell_type;
|
||||
|
||||
/// Parse the provided bash source using tree-sitter-bash, returning a Tree on
|
||||
/// success or None if parsing failed.
|
||||
pub fn try_parse_shell(shell_lc_arg: &str) -> Option<Tree> {
|
||||
let lang = BASH.into();
|
||||
let mut parser = Parser::new();
|
||||
#[expect(clippy::expect_used)]
|
||||
parser.set_language(&lang).expect("load bash grammar");
|
||||
let old_tree: Option<&Tree> = None;
|
||||
parser.parse(shell_lc_arg, old_tree)
|
||||
}
|
||||
|
||||
/// Parse a script which may contain multiple simple commands joined only by
|
||||
/// the safe logical/pipe/sequencing operators: `&&`, `||`, `;`, `|`.
|
||||
///
|
||||
/// Returns `Some(Vec<command_words>)` if every command is a plain word‑only
|
||||
/// command and the parse tree does not contain disallowed constructs
|
||||
/// (parentheses, redirections, substitutions, control flow, etc.). Otherwise
|
||||
/// returns `None`.
|
||||
pub fn try_parse_word_only_commands_sequence(tree: &Tree, src: &str) -> Option<Vec<Vec<String>>> {
|
||||
if tree.root_node().has_error() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// List of allowed (named) node kinds for a "word only commands sequence".
|
||||
// If we encounter a named node that is not in this list we reject.
|
||||
const ALLOWED_KINDS: &[&str] = &[
|
||||
// top level containers
|
||||
"program",
|
||||
"list",
|
||||
"pipeline",
|
||||
// commands & words
|
||||
"command",
|
||||
"command_name",
|
||||
"word",
|
||||
"string",
|
||||
"string_content",
|
||||
"raw_string",
|
||||
"number",
|
||||
"concatenation",
|
||||
];
|
||||
// Allow only safe punctuation / operator tokens; anything else causes reject.
|
||||
const ALLOWED_PUNCT_TOKENS: &[&str] = &["&&", "||", ";", "|", "\"", "'"];
|
||||
|
||||
let root = tree.root_node();
|
||||
let mut cursor = root.walk();
|
||||
let mut stack = vec![root];
|
||||
let mut command_nodes = Vec::new();
|
||||
while let Some(node) = stack.pop() {
|
||||
let kind = node.kind();
|
||||
if node.is_named() {
|
||||
if !ALLOWED_KINDS.contains(&kind) {
|
||||
return None;
|
||||
}
|
||||
if kind == "command" {
|
||||
command_nodes.push(node);
|
||||
}
|
||||
} else {
|
||||
// Reject any punctuation / operator tokens that are not explicitly allowed.
|
||||
if kind.chars().any(|c| "&;|".contains(c)) && !ALLOWED_PUNCT_TOKENS.contains(&kind) {
|
||||
return None;
|
||||
}
|
||||
if !(ALLOWED_PUNCT_TOKENS.contains(&kind) || kind.trim().is_empty()) {
|
||||
// If it's a quote token or operator it's allowed above; we also allow whitespace tokens.
|
||||
// Any other punctuation like parentheses, braces, redirects, backticks, etc are rejected.
|
||||
return None;
|
||||
}
|
||||
}
|
||||
for child in node.children(&mut cursor) {
|
||||
stack.push(child);
|
||||
}
|
||||
}
|
||||
|
||||
// Walk uses a stack (LIFO), so re-sort by position to restore source order.
|
||||
command_nodes.sort_by_key(Node::start_byte);
|
||||
|
||||
let mut commands = Vec::new();
|
||||
for node in command_nodes {
|
||||
if let Some(words) = parse_plain_command_from_node(node, src) {
|
||||
commands.push(words);
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
Some(commands)
|
||||
}
|
||||
|
||||
pub fn extract_bash_command(command: &[String]) -> Option<(&str, &str)> {
|
||||
let [shell, flag, script] = command else {
|
||||
return None;
|
||||
};
|
||||
if !matches!(flag.as_str(), "-lc" | "-c")
|
||||
|| !matches!(
|
||||
detect_shell_type(&PathBuf::from(shell)),
|
||||
Some(ShellType::Zsh) | Some(ShellType::Bash) | Some(ShellType::Sh)
|
||||
)
|
||||
{
|
||||
return None;
|
||||
}
|
||||
Some((shell, script))
|
||||
}
|
||||
|
||||
/// Returns the sequence of plain commands within a `bash -lc "..."` or
|
||||
/// `zsh -lc "..."` invocation when the script only contains word-only commands
|
||||
/// joined by safe operators.
|
||||
pub fn parse_shell_lc_plain_commands(command: &[String]) -> Option<Vec<Vec<String>>> {
|
||||
let (_, script) = extract_bash_command(command)?;
|
||||
|
||||
let tree = try_parse_shell(script)?;
|
||||
try_parse_word_only_commands_sequence(&tree, script)
|
||||
}
|
||||
|
||||
/// Returns the parsed argv for a single shell command in a here-doc style
|
||||
/// script (`<<`), as long as the script contains exactly one command node.
|
||||
pub fn parse_shell_lc_single_command_prefix(command: &[String]) -> Option<Vec<String>> {
|
||||
let (_, script) = extract_bash_command(command)?;
|
||||
let tree = try_parse_shell(script)?;
|
||||
let root = tree.root_node();
|
||||
if root.has_error() {
|
||||
return None;
|
||||
}
|
||||
if !has_named_descendant_kind(root, "heredoc_redirect") {
|
||||
return None;
|
||||
}
|
||||
|
||||
let command_node = find_single_command_node(root)?;
|
||||
parse_heredoc_command_words(command_node, script)
|
||||
}
|
||||
|
||||
fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option<Vec<String>> {
|
||||
if cmd.kind() != "command" {
|
||||
return None;
|
||||
}
|
||||
let mut words = Vec::new();
|
||||
let mut cursor = cmd.walk();
|
||||
for child in cmd.named_children(&mut cursor) {
|
||||
match child.kind() {
|
||||
"command_name" => {
|
||||
let word_node = child.named_child(0)?;
|
||||
if word_node.kind() != "word" {
|
||||
return None;
|
||||
}
|
||||
words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned());
|
||||
}
|
||||
"word" | "number" => {
|
||||
words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned());
|
||||
}
|
||||
"string" => {
|
||||
let parsed = parse_double_quoted_string(child, src)?;
|
||||
words.push(parsed);
|
||||
}
|
||||
"raw_string" => {
|
||||
let parsed = parse_raw_string(child, src)?;
|
||||
words.push(parsed);
|
||||
}
|
||||
"concatenation" => {
|
||||
// Handle concatenated arguments like -g"*.py"
|
||||
let mut concatenated = String::new();
|
||||
let mut concat_cursor = child.walk();
|
||||
for part in child.named_children(&mut concat_cursor) {
|
||||
match part.kind() {
|
||||
"word" | "number" => {
|
||||
concatenated
|
||||
.push_str(part.utf8_text(src.as_bytes()).ok()?.to_owned().as_str());
|
||||
}
|
||||
"string" => {
|
||||
let parsed = parse_double_quoted_string(part, src)?;
|
||||
concatenated.push_str(&parsed);
|
||||
}
|
||||
"raw_string" => {
|
||||
let parsed = parse_raw_string(part, src)?;
|
||||
concatenated.push_str(&parsed);
|
||||
}
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
if concatenated.is_empty() {
|
||||
return None;
|
||||
}
|
||||
words.push(concatenated);
|
||||
}
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
Some(words)
|
||||
}
|
||||
|
||||
fn parse_heredoc_command_words(cmd: Node<'_>, src: &str) -> Option<Vec<String>> {
|
||||
if cmd.kind() != "command" {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut words = Vec::new();
|
||||
let mut cursor = cmd.walk();
|
||||
for child in cmd.named_children(&mut cursor) {
|
||||
match child.kind() {
|
||||
"command_name" => {
|
||||
let word_node = child.named_child(0)?;
|
||||
if !matches!(word_node.kind(), "word" | "number")
|
||||
|| !is_literal_word_or_number(word_node)
|
||||
{
|
||||
return None;
|
||||
}
|
||||
words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned());
|
||||
}
|
||||
"word" | "number" => {
|
||||
if !is_literal_word_or_number(child) {
|
||||
return None;
|
||||
}
|
||||
words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned());
|
||||
}
|
||||
// Allow shell constructs that attach IO to a single command without
|
||||
// changing argv matching semantics for the executable prefix.
|
||||
"variable_assignment" | "comment" => {}
|
||||
kind if is_allowed_heredoc_attachment_kind(kind) => {}
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
|
||||
if words.is_empty() { None } else { Some(words) }
|
||||
}
|
||||
|
||||
fn is_literal_word_or_number(node: Node<'_>) -> bool {
|
||||
if !matches!(node.kind(), "word" | "number") {
|
||||
return false;
|
||||
}
|
||||
let mut cursor = node.walk();
|
||||
node.named_children(&mut cursor).next().is_none()
|
||||
}
|
||||
|
||||
fn has_named_descendant_kind(node: Node<'_>, kind: &str) -> bool {
|
||||
let mut stack = vec![node];
|
||||
while let Some(current) = stack.pop() {
|
||||
if current.kind() == kind {
|
||||
return true;
|
||||
}
|
||||
let mut cursor = current.walk();
|
||||
for child in current.named_children(&mut cursor) {
|
||||
stack.push(child);
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn is_allowed_heredoc_attachment_kind(kind: &str) -> bool {
|
||||
matches!(
|
||||
kind,
|
||||
"heredoc_body"
|
||||
| "simple_heredoc_body"
|
||||
| "heredoc_redirect"
|
||||
| "herestring_redirect"
|
||||
| "file_redirect"
|
||||
| "redirected_statement"
|
||||
)
|
||||
}
|
||||
|
||||
fn find_single_command_node(root: Node<'_>) -> Option<Node<'_>> {
|
||||
let mut stack = vec![root];
|
||||
let mut single_command = None;
|
||||
while let Some(node) = stack.pop() {
|
||||
if node.kind() == "command" {
|
||||
if single_command.is_some() {
|
||||
return None;
|
||||
}
|
||||
single_command = Some(node);
|
||||
}
|
||||
|
||||
let mut cursor = node.walk();
|
||||
for child in node.named_children(&mut cursor) {
|
||||
stack.push(child);
|
||||
}
|
||||
}
|
||||
single_command
|
||||
}
|
||||
|
||||
fn parse_double_quoted_string(node: Node, src: &str) -> Option<String> {
|
||||
if node.kind() != "string" {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut cursor = node.walk();
|
||||
for part in node.named_children(&mut cursor) {
|
||||
if part.kind() != "string_content" {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
let raw = node.utf8_text(src.as_bytes()).ok()?;
|
||||
let stripped = raw
|
||||
.strip_prefix('"')
|
||||
.and_then(|text| text.strip_suffix('"'))?;
|
||||
Some(stripped.to_string())
|
||||
}
|
||||
|
||||
fn parse_raw_string(node: Node, src: &str) -> Option<String> {
|
||||
if node.kind() != "raw_string" {
|
||||
return None;
|
||||
}
|
||||
|
||||
let raw_string = node.utf8_text(src.as_bytes()).ok()?;
|
||||
let stripped = raw_string
|
||||
.strip_prefix('\'')
|
||||
.and_then(|s| s.strip_suffix('\''));
|
||||
stripped.map(str::to_owned)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
fn parse_seq(src: &str) -> Option<Vec<Vec<String>>> {
|
||||
let tree = try_parse_shell(src)?;
|
||||
try_parse_word_only_commands_sequence(&tree, src)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn accepts_single_simple_command() {
|
||||
let cmds = parse_seq("ls -1").unwrap();
|
||||
assert_eq!(cmds, vec![vec!["ls".to_string(), "-1".to_string()]]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn accepts_multiple_commands_with_allowed_operators() {
|
||||
let src = "ls && pwd; echo 'hi there' | wc -l";
|
||||
let cmds = parse_seq(src).unwrap();
|
||||
let expected: Vec<Vec<String>> = vec![
|
||||
vec!["ls".to_string()],
|
||||
vec!["pwd".to_string()],
|
||||
vec!["echo".to_string(), "hi there".to_string()],
|
||||
vec!["wc".to_string(), "-l".to_string()],
|
||||
];
|
||||
assert_eq!(cmds, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extracts_double_and_single_quoted_strings() {
|
||||
let cmds = parse_seq("echo \"hello world\"").unwrap();
|
||||
assert_eq!(
|
||||
cmds,
|
||||
vec![vec!["echo".to_string(), "hello world".to_string()]]
|
||||
);
|
||||
|
||||
let cmds2 = parse_seq("echo 'hi there'").unwrap();
|
||||
assert_eq!(
|
||||
cmds2,
|
||||
vec![vec!["echo".to_string(), "hi there".to_string()]]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn accepts_double_quoted_strings_with_newlines() {
|
||||
let cmds = parse_seq("git commit -m \"line1\nline2\"").unwrap();
|
||||
assert_eq!(
|
||||
cmds,
|
||||
vec![vec![
|
||||
"git".to_string(),
|
||||
"commit".to_string(),
|
||||
"-m".to_string(),
|
||||
"line1\nline2".to_string(),
|
||||
]]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn accepts_mixed_quote_concatenation() {
|
||||
assert_eq!(
|
||||
parse_seq(r#"echo "/usr"'/'"local"/bin"#).unwrap(),
|
||||
vec![vec!["echo".to_string(), "/usr/local/bin".to_string()]]
|
||||
);
|
||||
assert_eq!(
|
||||
parse_seq(r#"echo '/usr'"/"'local'/bin"#).unwrap(),
|
||||
vec![vec!["echo".to_string(), "/usr/local/bin".to_string()]]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_double_quoted_strings_with_expansions() {
|
||||
assert!(parse_seq(r#"echo "hi ${USER}""#).is_none());
|
||||
assert!(parse_seq(r#"echo "$HOME""#).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn accepts_numbers_as_words() {
|
||||
let cmds = parse_seq("echo 123 456").unwrap();
|
||||
assert_eq!(
|
||||
cmds,
|
||||
vec![vec![
|
||||
"echo".to_string(),
|
||||
"123".to_string(),
|
||||
"456".to_string()
|
||||
]]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_parentheses_and_subshells() {
|
||||
assert!(parse_seq("(ls)").is_none());
|
||||
assert!(parse_seq("ls || (pwd && echo hi)").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_redirections_and_unsupported_operators() {
|
||||
assert!(parse_seq("ls > out.txt").is_none());
|
||||
assert!(parse_seq("echo hi & echo bye").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_command_and_process_substitutions_and_expansions() {
|
||||
assert!(parse_seq("echo $(pwd)").is_none());
|
||||
assert!(parse_seq("echo `pwd`").is_none());
|
||||
assert!(parse_seq("echo $HOME").is_none());
|
||||
assert!(parse_seq("echo \"hi $USER\"").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_variable_assignment_prefix() {
|
||||
assert!(parse_seq("FOO=bar ls").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_trailing_operator_parse_error() {
|
||||
assert!(parse_seq("ls &&").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_zsh_lc_plain_commands() {
|
||||
let command = vec!["zsh".to_string(), "-lc".to_string(), "ls".to_string()];
|
||||
let parsed = parse_shell_lc_plain_commands(&command).unwrap();
|
||||
assert_eq!(parsed, vec![vec!["ls".to_string()]]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn accepts_concatenated_flag_and_value() {
|
||||
// Test case: -g"*.py" (flag directly concatenated with quoted value)
|
||||
let cmds = parse_seq("rg -n \"foo\" -g\"*.py\"").unwrap();
|
||||
assert_eq!(
|
||||
cmds,
|
||||
vec![vec![
|
||||
"rg".to_string(),
|
||||
"-n".to_string(),
|
||||
"foo".to_string(),
|
||||
"-g*.py".to_string(),
|
||||
]]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn accepts_concatenated_flag_with_single_quotes() {
|
||||
let cmds = parse_seq("grep -n 'pattern' -g'*.txt'").unwrap();
|
||||
assert_eq!(
|
||||
cmds,
|
||||
vec![vec![
|
||||
"grep".to_string(),
|
||||
"-n".to_string(),
|
||||
"pattern".to_string(),
|
||||
"-g*.txt".to_string(),
|
||||
]]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_concatenation_with_variable_substitution() {
|
||||
// Environment variables in concatenated strings should be rejected
|
||||
assert!(parse_seq("rg -g\"$VAR\" pattern").is_none());
|
||||
assert!(parse_seq("rg -g\"${VAR}\" pattern").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_concatenation_with_command_substitution() {
|
||||
// Command substitution in concatenated strings should be rejected
|
||||
assert!(parse_seq("rg -g\"$(pwd)\" pattern").is_none());
|
||||
assert!(parse_seq("rg -g\"$(echo '*.py')\" pattern").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_shell_lc_single_command_prefix_supports_heredoc() {
|
||||
let command = vec![
|
||||
"zsh".to_string(),
|
||||
"-lc".to_string(),
|
||||
"python3 <<'PY'\nprint('hello')\nPY".to_string(),
|
||||
];
|
||||
let parsed = parse_shell_lc_single_command_prefix(&command);
|
||||
assert_eq!(parsed, Some(vec!["python3".to_string()]));
|
||||
|
||||
let command_unquoted = vec![
|
||||
"zsh".to_string(),
|
||||
"-lc".to_string(),
|
||||
"python3 << PY\nprint('hello')\nPY".to_string(),
|
||||
];
|
||||
let parsed_unquoted = parse_shell_lc_single_command_prefix(&command_unquoted);
|
||||
assert_eq!(parsed_unquoted, Some(vec!["python3".to_string()]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_shell_lc_single_command_prefix_rejects_multi_command_scripts() {
|
||||
let command = vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"python3 <<'PY'\nprint('hello')\nPY\necho done".to_string(),
|
||||
];
|
||||
assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_shell_lc_single_command_prefix_rejects_non_heredoc_redirects() {
|
||||
let command = vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"echo hello > /tmp/out.txt".to_string(),
|
||||
];
|
||||
assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_shell_lc_single_command_prefix_accepts_heredoc_with_extra_redirect() {
|
||||
let command = vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"python3 <<'PY' > /tmp/out.txt\nprint('hello')\nPY".to_string(),
|
||||
];
|
||||
assert_eq!(
|
||||
parse_shell_lc_single_command_prefix(&command),
|
||||
Some(vec!["python3".to_string()])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_shell_lc_single_command_prefix_rejects_herestring_with_substitution() {
|
||||
let command = vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
r#"python3 <<< "$(rm -rf /)""#.to_string(),
|
||||
];
|
||||
assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_shell_lc_single_command_prefix_rejects_arithmetic_shift_non_heredoc_script() {
|
||||
let command = vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"echo $((1<<2))".to_string(),
|
||||
];
|
||||
assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_shell_lc_single_command_prefix_rejects_heredoc_command_with_word_expansion() {
|
||||
let command = vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"python3 $((1<<2)) <<'PY'\nprint('hello')\nPY".to_string(),
|
||||
];
|
||||
assert_eq!(parse_shell_lc_single_command_prefix(&command), None);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,382 @@
|
||||
use crate::bash::parse_shell_lc_plain_commands;
|
||||
#[cfg(windows)]
|
||||
#[path = "windows_dangerous_commands.rs"]
|
||||
mod windows_dangerous_commands;
|
||||
|
||||
pub fn command_might_be_dangerous(command: &[String]) -> bool {
|
||||
#[cfg(windows)]
|
||||
{
|
||||
if windows_dangerous_commands::is_dangerous_command_windows(command) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if is_dangerous_to_call_with_exec(command) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Support `bash -lc "<script>"` where the any part of the script might contain a dangerous command.
|
||||
if let Some(all_commands) = parse_shell_lc_plain_commands(command)
|
||||
&& all_commands
|
||||
.iter()
|
||||
.any(|cmd| is_dangerous_to_call_with_exec(cmd))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
fn is_git_global_option_with_value(arg: &str) -> bool {
|
||||
matches!(
|
||||
arg,
|
||||
"-C" | "-c"
|
||||
| "--config-env"
|
||||
| "--exec-path"
|
||||
| "--git-dir"
|
||||
| "--namespace"
|
||||
| "--super-prefix"
|
||||
| "--work-tree"
|
||||
)
|
||||
}
|
||||
|
||||
fn is_git_global_option_with_inline_value(arg: &str) -> bool {
|
||||
matches!(
|
||||
arg,
|
||||
s if s.starts_with("--config-env=")
|
||||
|| s.starts_with("--exec-path=")
|
||||
|| s.starts_with("--git-dir=")
|
||||
|| s.starts_with("--namespace=")
|
||||
|| s.starts_with("--super-prefix=")
|
||||
|| s.starts_with("--work-tree=")
|
||||
) || ((arg.starts_with("-C") || arg.starts_with("-c")) && arg.len() > 2)
|
||||
}
|
||||
|
||||
/// Find the first matching git subcommand, skipping known global options that
|
||||
/// may appear before it (e.g., `-C`, `-c`, `--git-dir`).
|
||||
///
|
||||
/// Shared with `is_safe_command` to avoid git-global-option bypasses.
|
||||
pub(crate) fn find_git_subcommand<'a>(
|
||||
command: &'a [String],
|
||||
subcommands: &[&str],
|
||||
) -> Option<(usize, &'a str)> {
|
||||
let cmd0 = command.first().map(String::as_str)?;
|
||||
if !cmd0.ends_with("git") {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut skip_next = false;
|
||||
for (idx, arg) in command.iter().enumerate().skip(1) {
|
||||
if skip_next {
|
||||
skip_next = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
let arg = arg.as_str();
|
||||
|
||||
if is_git_global_option_with_inline_value(arg) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if is_git_global_option_with_value(arg) {
|
||||
skip_next = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if arg == "--" || arg.starts_with('-') {
|
||||
continue;
|
||||
}
|
||||
|
||||
if subcommands.contains(&arg) {
|
||||
return Some((idx, arg));
|
||||
}
|
||||
|
||||
// In git, the first non-option token is the subcommand. If it isn't
|
||||
// one of the subcommands we're looking for, we must stop scanning to
|
||||
// avoid misclassifying later positional args (e.g., branch names).
|
||||
return None;
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn is_dangerous_to_call_with_exec(command: &[String]) -> bool {
|
||||
let cmd0 = command.first().map(String::as_str);
|
||||
|
||||
match cmd0 {
|
||||
Some(cmd) if cmd.ends_with("git") => {
|
||||
let Some((subcommand_idx, subcommand)) =
|
||||
find_git_subcommand(command, &["reset", "rm", "branch", "push", "clean"])
|
||||
else {
|
||||
return false;
|
||||
};
|
||||
|
||||
match subcommand {
|
||||
"reset" | "rm" => true,
|
||||
"branch" => git_branch_is_delete(&command[subcommand_idx + 1..]),
|
||||
"push" => git_push_is_dangerous(&command[subcommand_idx + 1..]),
|
||||
"clean" => git_clean_is_force(&command[subcommand_idx + 1..]),
|
||||
other => {
|
||||
debug_assert!(false, "unexpected git subcommand from matcher: {other}");
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some("rm") => matches!(command.get(1).map(String::as_str), Some("-f" | "-rf")),
|
||||
|
||||
// for sudo <cmd> simply do the check for <cmd>
|
||||
Some("sudo") => is_dangerous_to_call_with_exec(&command[1..]),
|
||||
|
||||
// ── anything else ─────────────────────────────────────────────────
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn git_branch_is_delete(branch_args: &[String]) -> bool {
|
||||
// Git allows stacking short flags (for example, `-dv` or `-vd`). Treat any
|
||||
// short-flag group containing `d`/`D` as a delete flag.
|
||||
branch_args.iter().map(String::as_str).any(|arg| {
|
||||
matches!(arg, "-d" | "-D" | "--delete")
|
||||
|| arg.starts_with("--delete=")
|
||||
|| short_flag_group_contains(arg, 'd')
|
||||
|| short_flag_group_contains(arg, 'D')
|
||||
})
|
||||
}
|
||||
|
||||
fn short_flag_group_contains(arg: &str, target: char) -> bool {
|
||||
arg.starts_with('-') && !arg.starts_with("--") && arg.chars().skip(1).any(|c| c == target)
|
||||
}
|
||||
|
||||
fn git_push_is_dangerous(push_args: &[String]) -> bool {
|
||||
push_args.iter().map(String::as_str).any(|arg| {
|
||||
matches!(
|
||||
arg,
|
||||
"--force" | "--force-with-lease" | "--force-if-includes" | "--delete" | "-f" | "-d"
|
||||
) || arg.starts_with("--force-with-lease=")
|
||||
|| arg.starts_with("--force-if-includes=")
|
||||
|| arg.starts_with("--delete=")
|
||||
|| short_flag_group_contains(arg, 'f')
|
||||
|| short_flag_group_contains(arg, 'd')
|
||||
|| git_push_refspec_is_dangerous(arg)
|
||||
})
|
||||
}
|
||||
|
||||
fn git_push_refspec_is_dangerous(arg: &str) -> bool {
|
||||
// `+<refspec>` forces updates and `:<dst>` deletes remote refs.
|
||||
(arg.starts_with('+') || arg.starts_with(':')) && arg.len() > 1
|
||||
}
|
||||
|
||||
fn git_clean_is_force(clean_args: &[String]) -> bool {
|
||||
clean_args.iter().map(String::as_str).any(|arg| {
|
||||
matches!(arg, "--force" | "-f")
|
||||
|| arg.starts_with("--force=")
|
||||
|| short_flag_group_contains(arg, 'f')
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn vec_str(items: &[&str]) -> Vec<String> {
|
||||
items.iter().map(std::string::ToString::to_string).collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn git_reset_is_dangerous() {
|
||||
assert!(command_might_be_dangerous(&vec_str(&["git", "reset"])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bash_git_reset_is_dangerous() {
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"bash",
|
||||
"-lc",
|
||||
"git reset --hard",
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zsh_git_reset_is_dangerous() {
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"zsh",
|
||||
"-lc",
|
||||
"git reset --hard",
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn git_status_is_not_dangerous() {
|
||||
assert!(!command_might_be_dangerous(&vec_str(&["git", "status"])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bash_git_status_is_not_dangerous() {
|
||||
assert!(!command_might_be_dangerous(&vec_str(&[
|
||||
"bash",
|
||||
"-lc",
|
||||
"git status",
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sudo_git_reset_is_dangerous() {
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"sudo", "git", "reset", "--hard",
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn usr_bin_git_is_dangerous() {
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"/usr/bin/git",
|
||||
"reset",
|
||||
"--hard",
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn git_branch_delete_is_dangerous() {
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"git", "branch", "-d", "feature",
|
||||
])));
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"git", "branch", "-D", "feature",
|
||||
])));
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"bash",
|
||||
"-lc",
|
||||
"git branch --delete feature",
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn git_branch_delete_with_stacked_short_flags_is_dangerous() {
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"git", "branch", "-dv", "feature",
|
||||
])));
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"git", "branch", "-vd", "feature",
|
||||
])));
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"git", "branch", "-vD", "feature",
|
||||
])));
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"git", "branch", "-Dvv", "feature",
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn git_branch_delete_with_global_options_is_dangerous() {
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"git", "-C", ".", "branch", "-d", "feature",
|
||||
])));
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"git",
|
||||
"-c",
|
||||
"color.ui=false",
|
||||
"branch",
|
||||
"-D",
|
||||
"feature",
|
||||
])));
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"bash",
|
||||
"-lc",
|
||||
"git -C . branch -d feature",
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn git_checkout_reset_is_not_dangerous() {
|
||||
// The first non-option token is "checkout", so later positional args
|
||||
// like branch names must not be treated as subcommands.
|
||||
assert!(!command_might_be_dangerous(&vec_str(&[
|
||||
"git", "checkout", "reset",
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn git_push_force_is_dangerous() {
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"git", "push", "--force", "origin", "main",
|
||||
])));
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"git", "push", "-f", "origin", "main",
|
||||
])));
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"git",
|
||||
"-C",
|
||||
".",
|
||||
"push",
|
||||
"--force-with-lease",
|
||||
"origin",
|
||||
"main",
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn git_push_plus_refspec_is_dangerous() {
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"git", "push", "origin", "+main",
|
||||
])));
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"git",
|
||||
"push",
|
||||
"origin",
|
||||
"+refs/heads/main:refs/heads/main",
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn git_push_delete_flag_is_dangerous() {
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"git", "push", "--delete", "origin", "feature",
|
||||
])));
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"git", "push", "-d", "origin", "feature",
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn git_push_delete_refspec_is_dangerous() {
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"git", "push", "origin", ":feature",
|
||||
])));
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"bash",
|
||||
"-lc",
|
||||
"git push origin :feature",
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn git_push_without_force_is_not_dangerous() {
|
||||
assert!(!command_might_be_dangerous(&vec_str(&[
|
||||
"git", "push", "origin", "main",
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn git_clean_force_is_dangerous_even_when_f_is_not_first_flag() {
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"git", "clean", "-fdx",
|
||||
])));
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"git", "clean", "-xdf",
|
||||
])));
|
||||
assert!(command_might_be_dangerous(&vec_str(&[
|
||||
"git", "clean", "--force",
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rm_rf_is_dangerous() {
|
||||
assert!(command_might_be_dangerous(&vec_str(&["rm", "-rf", "/"])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rm_f_is_dangerous() {
|
||||
assert!(command_might_be_dangerous(&vec_str(&["rm", "-f", "/"])));
|
||||
}
|
||||
}
|
||||
592
codex-rs/shell-command/src/command_safety/is_safe_command.rs
Normal file
592
codex-rs/shell-command/src/command_safety/is_safe_command.rs
Normal file
@@ -0,0 +1,592 @@
|
||||
use crate::bash::parse_shell_lc_plain_commands;
|
||||
// Find the first matching git subcommand, skipping known global options that
|
||||
// may appear before it (e.g., `-C`, `-c`, `--git-dir`).
|
||||
// Implemented in `is_dangerous_command` and shared here.
|
||||
use crate::command_safety::is_dangerous_command::find_git_subcommand;
|
||||
use crate::command_safety::windows_safe_commands::is_safe_command_windows;
|
||||
|
||||
pub fn is_known_safe_command(command: &[String]) -> bool {
|
||||
let command: Vec<String> = command
|
||||
.iter()
|
||||
.map(|s| {
|
||||
if s == "zsh" {
|
||||
"bash".to_string()
|
||||
} else {
|
||||
s.clone()
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
if is_safe_command_windows(&command) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if is_safe_to_call_with_exec(&command) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Support `bash -lc "..."` where the script consists solely of one or
|
||||
// more "plain" commands (only bare words / quoted strings) combined with
|
||||
// a conservative allow‑list of shell operators that themselves do not
|
||||
// introduce side effects ( "&&", "||", ";", and "|" ). If every
|
||||
// individual command in the script is itself a known‑safe command, then
|
||||
// the composite expression is considered safe.
|
||||
if let Some(all_commands) = parse_shell_lc_plain_commands(&command)
|
||||
&& !all_commands.is_empty()
|
||||
&& all_commands
|
||||
.iter()
|
||||
.all(|cmd| is_safe_to_call_with_exec(cmd))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn is_safe_to_call_with_exec(command: &[String]) -> bool {
|
||||
let Some(cmd0) = command.first().map(String::as_str) else {
|
||||
return false;
|
||||
};
|
||||
|
||||
match std::path::Path::new(&cmd0)
|
||||
.file_name()
|
||||
.and_then(|osstr| osstr.to_str())
|
||||
{
|
||||
Some(cmd) if cfg!(target_os = "linux") && matches!(cmd, "numfmt" | "tac") => true,
|
||||
|
||||
#[rustfmt::skip]
|
||||
Some(
|
||||
"cat" |
|
||||
"cd" |
|
||||
"cut" |
|
||||
"echo" |
|
||||
"expr" |
|
||||
"false" |
|
||||
"grep" |
|
||||
"head" |
|
||||
"id" |
|
||||
"ls" |
|
||||
"nl" |
|
||||
"paste" |
|
||||
"pwd" |
|
||||
"rev" |
|
||||
"seq" |
|
||||
"stat" |
|
||||
"tail" |
|
||||
"tr" |
|
||||
"true" |
|
||||
"uname" |
|
||||
"uniq" |
|
||||
"wc" |
|
||||
"which" |
|
||||
"whoami") => {
|
||||
true
|
||||
},
|
||||
|
||||
Some("base64") => {
|
||||
const UNSAFE_BASE64_OPTIONS: &[&str] = &["-o", "--output"];
|
||||
|
||||
!command.iter().skip(1).any(|arg| {
|
||||
UNSAFE_BASE64_OPTIONS.contains(&arg.as_str())
|
||||
|| arg.starts_with("--output=")
|
||||
|| (arg.starts_with("-o") && arg != "-o")
|
||||
})
|
||||
}
|
||||
|
||||
Some("find") => {
|
||||
// Certain options to `find` can delete files, write to files, or
|
||||
// execute arbitrary commands, so we cannot auto-approve the
|
||||
// invocation of `find` in such cases.
|
||||
#[rustfmt::skip]
|
||||
const UNSAFE_FIND_OPTIONS: &[&str] = &[
|
||||
// Options that can execute arbitrary commands.
|
||||
"-exec", "-execdir", "-ok", "-okdir",
|
||||
// Option that deletes matching files.
|
||||
"-delete",
|
||||
// Options that write pathnames to a file.
|
||||
"-fls", "-fprint", "-fprint0", "-fprintf",
|
||||
];
|
||||
|
||||
!command
|
||||
.iter()
|
||||
.any(|arg| UNSAFE_FIND_OPTIONS.contains(&arg.as_str()))
|
||||
}
|
||||
|
||||
// Ripgrep
|
||||
Some("rg") => {
|
||||
const UNSAFE_RIPGREP_OPTIONS_WITH_ARGS: &[&str] = &[
|
||||
// Takes an arbitrary command that is executed for each match.
|
||||
"--pre",
|
||||
// Takes a command that can be used to obtain the local hostname.
|
||||
"--hostname-bin",
|
||||
];
|
||||
const UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS: &[&str] = &[
|
||||
// Calls out to other decompression tools, so do not auto-approve
|
||||
// out of an abundance of caution.
|
||||
"--search-zip",
|
||||
"-z",
|
||||
];
|
||||
|
||||
!command.iter().any(|arg| {
|
||||
UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS.contains(&arg.as_str())
|
||||
|| UNSAFE_RIPGREP_OPTIONS_WITH_ARGS
|
||||
.iter()
|
||||
.any(|&opt| arg == opt || arg.starts_with(&format!("{opt}=")))
|
||||
})
|
||||
}
|
||||
|
||||
// Git
|
||||
Some("git") => {
|
||||
// Global config overrides like `-c core.pager=...` can force git
|
||||
// to execute arbitrary external commands. With no sandboxing, we
|
||||
// should always prompt in those cases.
|
||||
if git_has_config_override_global_option(command) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let Some((subcommand_idx, subcommand)) =
|
||||
find_git_subcommand(command, &["status", "log", "diff", "show", "branch"])
|
||||
else {
|
||||
return false;
|
||||
};
|
||||
|
||||
let subcommand_args = &command[subcommand_idx + 1..];
|
||||
|
||||
match subcommand {
|
||||
"status" | "log" | "diff" | "show" => {
|
||||
git_subcommand_args_are_read_only(subcommand_args)
|
||||
}
|
||||
"branch" => {
|
||||
git_subcommand_args_are_read_only(subcommand_args)
|
||||
&& git_branch_is_read_only(subcommand_args)
|
||||
}
|
||||
other => {
|
||||
debug_assert!(false, "unexpected git subcommand from matcher: {other}");
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Special-case `sed -n {N|M,N}p`
|
||||
Some("sed")
|
||||
if {
|
||||
command.len() <= 4
|
||||
&& command.get(1).map(String::as_str) == Some("-n")
|
||||
&& is_valid_sed_n_arg(command.get(2).map(String::as_str))
|
||||
} =>
|
||||
{
|
||||
true
|
||||
}
|
||||
|
||||
// ── anything else ─────────────────────────────────────────────────
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
// Treat `git branch` as safe only when the arguments clearly indicate
|
||||
// a read-only query, not a branch mutation (create/rename/delete).
|
||||
fn git_branch_is_read_only(branch_args: &[String]) -> bool {
|
||||
if branch_args.is_empty() {
|
||||
// `git branch` with no additional args lists branches.
|
||||
return true;
|
||||
}
|
||||
|
||||
let mut saw_read_only_flag = false;
|
||||
for arg in branch_args.iter().map(String::as_str) {
|
||||
match arg {
|
||||
"--list" | "-l" | "--show-current" | "-a" | "--all" | "-r" | "--remotes" | "-v"
|
||||
| "-vv" | "--verbose" => {
|
||||
saw_read_only_flag = true;
|
||||
}
|
||||
_ if arg.starts_with("--format=") => {
|
||||
saw_read_only_flag = true;
|
||||
}
|
||||
_ => {
|
||||
// Any other flag or positional argument may create, rename, or delete branches.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
saw_read_only_flag
|
||||
}
|
||||
|
||||
fn git_has_config_override_global_option(command: &[String]) -> bool {
|
||||
command.iter().map(String::as_str).any(|arg| {
|
||||
matches!(arg, "-c" | "--config-env")
|
||||
|| (arg.starts_with("-c") && arg.len() > 2)
|
||||
|| arg.starts_with("--config-env=")
|
||||
})
|
||||
}
|
||||
|
||||
fn git_subcommand_args_are_read_only(args: &[String]) -> bool {
|
||||
// Flags that can write to disk or execute external tools should never be
|
||||
// auto-approved on an unsandboxed machine.
|
||||
const UNSAFE_GIT_FLAGS: &[&str] = &[
|
||||
"--output",
|
||||
"--ext-diff",
|
||||
"--textconv",
|
||||
"--exec",
|
||||
"--paginate",
|
||||
];
|
||||
|
||||
!args.iter().map(String::as_str).any(|arg| {
|
||||
UNSAFE_GIT_FLAGS.contains(&arg)
|
||||
|| arg.starts_with("--output=")
|
||||
|| arg.starts_with("--exec=")
|
||||
})
|
||||
}
|
||||
|
||||
// (bash parsing helpers implemented in crate::bash)
|
||||
|
||||
/* ----------------------------------------------------------
|
||||
Example
|
||||
---------------------------------------------------------- */
|
||||
|
||||
/// Returns true if `arg` matches /^(\d+,)?\d+p$/
|
||||
fn is_valid_sed_n_arg(arg: Option<&str>) -> bool {
|
||||
// unwrap or bail
|
||||
let s = match arg {
|
||||
Some(s) => s,
|
||||
None => return false,
|
||||
};
|
||||
|
||||
// must end with 'p', strip it
|
||||
let core = match s.strip_suffix('p') {
|
||||
Some(rest) => rest,
|
||||
None => return false,
|
||||
};
|
||||
|
||||
// split on ',' and ensure 1 or 2 numeric parts
|
||||
let parts: Vec<&str> = core.split(',').collect();
|
||||
match parts.as_slice() {
|
||||
// single number, e.g. "10"
|
||||
[num] => !num.is_empty() && num.chars().all(|c| c.is_ascii_digit()),
|
||||
|
||||
// two numbers, e.g. "1,5"
|
||||
[a, b] => {
|
||||
!a.is_empty()
|
||||
&& !b.is_empty()
|
||||
&& a.chars().all(|c| c.is_ascii_digit())
|
||||
&& b.chars().all(|c| c.is_ascii_digit())
|
||||
}
|
||||
|
||||
// anything else (more than one comma) is invalid
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::string::ToString;
|
||||
|
||||
fn vec_str(args: &[&str]) -> Vec<String> {
|
||||
args.iter().map(ToString::to_string).collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn known_safe_examples() {
|
||||
assert!(is_safe_to_call_with_exec(&vec_str(&["ls"])));
|
||||
assert!(is_safe_to_call_with_exec(&vec_str(&["git", "status"])));
|
||||
assert!(is_safe_to_call_with_exec(&vec_str(&["git", "branch"])));
|
||||
assert!(is_safe_to_call_with_exec(&vec_str(&[
|
||||
"git",
|
||||
"branch",
|
||||
"--show-current"
|
||||
])));
|
||||
assert!(is_safe_to_call_with_exec(&vec_str(&["base64"])));
|
||||
assert!(is_safe_to_call_with_exec(&vec_str(&[
|
||||
"sed", "-n", "1,5p", "file.txt"
|
||||
])));
|
||||
assert!(is_safe_to_call_with_exec(&vec_str(&[
|
||||
"nl",
|
||||
"-nrz",
|
||||
"Cargo.toml"
|
||||
])));
|
||||
|
||||
// Safe `find` command (no unsafe options).
|
||||
assert!(is_safe_to_call_with_exec(&vec_str(&[
|
||||
"find", ".", "-name", "file.txt"
|
||||
])));
|
||||
|
||||
if cfg!(target_os = "linux") {
|
||||
assert!(is_safe_to_call_with_exec(&vec_str(&["numfmt", "1000"])));
|
||||
assert!(is_safe_to_call_with_exec(&vec_str(&["tac", "Cargo.toml"])));
|
||||
} else {
|
||||
assert!(!is_safe_to_call_with_exec(&vec_str(&["numfmt", "1000"])));
|
||||
assert!(!is_safe_to_call_with_exec(&vec_str(&["tac", "Cargo.toml"])));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn git_branch_mutating_flags_are_not_safe() {
|
||||
assert!(!is_known_safe_command(&vec_str(&[
|
||||
"git", "branch", "-d", "feature"
|
||||
])));
|
||||
assert!(!is_known_safe_command(&vec_str(&[
|
||||
"git",
|
||||
"branch",
|
||||
"new-branch"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn git_branch_global_options_respect_safety_rules() {
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
assert_eq!(
|
||||
is_known_safe_command(&vec_str(&["git", "-C", ".", "branch", "--show-current"])),
|
||||
true
|
||||
);
|
||||
assert_eq!(
|
||||
is_known_safe_command(&vec_str(&["git", "-C", ".", "branch", "-d", "feature"])),
|
||||
false
|
||||
);
|
||||
assert_eq!(
|
||||
is_known_safe_command(&vec_str(&["bash", "-lc", "git -C . branch -d feature",])),
|
||||
false
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn git_first_positional_is_the_subcommand() {
|
||||
// In git, the first non-option token is the subcommand. Later positional
|
||||
// args (like branch names) must not be treated as subcommands.
|
||||
assert!(!is_known_safe_command(&vec_str(&[
|
||||
"git", "checkout", "status",
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn git_output_and_config_override_flags_are_not_safe() {
|
||||
assert!(!is_known_safe_command(&vec_str(&[
|
||||
"git",
|
||||
"log",
|
||||
"--output=/tmp/git-log-out-test",
|
||||
"-n",
|
||||
"1",
|
||||
])));
|
||||
assert!(!is_known_safe_command(&vec_str(&[
|
||||
"git",
|
||||
"diff",
|
||||
"--output",
|
||||
"/tmp/git-diff-out-test",
|
||||
])));
|
||||
assert!(!is_known_safe_command(&vec_str(&[
|
||||
"git",
|
||||
"show",
|
||||
"--output=/tmp/git-show-out-test",
|
||||
"HEAD",
|
||||
])));
|
||||
assert!(!is_known_safe_command(&vec_str(&[
|
||||
"git",
|
||||
"-c",
|
||||
"core.pager=cat",
|
||||
"log",
|
||||
"-n",
|
||||
"1",
|
||||
])));
|
||||
assert!(!is_known_safe_command(&vec_str(&[
|
||||
"git",
|
||||
"-ccore.pager=cat",
|
||||
"status",
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cargo_check_is_not_safe() {
|
||||
assert!(!is_known_safe_command(&vec_str(&["cargo", "check"])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zsh_lc_safe_command_sequence() {
|
||||
assert!(is_known_safe_command(&vec_str(&["zsh", "-lc", "ls"])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_or_partial() {
|
||||
assert!(!is_safe_to_call_with_exec(&vec_str(&["foo"])));
|
||||
assert!(!is_safe_to_call_with_exec(&vec_str(&["git", "fetch"])));
|
||||
assert!(!is_safe_to_call_with_exec(&vec_str(&[
|
||||
"sed", "-n", "xp", "file.txt"
|
||||
])));
|
||||
|
||||
// Unsafe `find` commands.
|
||||
for args in [
|
||||
vec_str(&["find", ".", "-name", "file.txt", "-exec", "rm", "{}", ";"]),
|
||||
vec_str(&[
|
||||
"find", ".", "-name", "*.py", "-execdir", "python3", "{}", ";",
|
||||
]),
|
||||
vec_str(&["find", ".", "-name", "file.txt", "-ok", "rm", "{}", ";"]),
|
||||
vec_str(&["find", ".", "-name", "*.py", "-okdir", "python3", "{}", ";"]),
|
||||
vec_str(&["find", ".", "-delete", "-name", "file.txt"]),
|
||||
vec_str(&["find", ".", "-fls", "/etc/passwd"]),
|
||||
vec_str(&["find", ".", "-fprint", "/etc/passwd"]),
|
||||
vec_str(&["find", ".", "-fprint0", "/etc/passwd"]),
|
||||
vec_str(&["find", ".", "-fprintf", "/root/suid.txt", "%#m %u %p\n"]),
|
||||
] {
|
||||
assert!(
|
||||
!is_safe_to_call_with_exec(&args),
|
||||
"expected {args:?} to be unsafe"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn base64_output_options_are_unsafe() {
|
||||
for args in [
|
||||
vec_str(&["base64", "-o", "out.bin"]),
|
||||
vec_str(&["base64", "--output", "out.bin"]),
|
||||
vec_str(&["base64", "--output=out.bin"]),
|
||||
vec_str(&["base64", "-ob64.txt"]),
|
||||
] {
|
||||
assert!(
|
||||
!is_safe_to_call_with_exec(&args),
|
||||
"expected {args:?} to be considered unsafe due to output option"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ripgrep_rules() {
|
||||
// Safe ripgrep invocations – none of the unsafe flags are present.
|
||||
assert!(is_safe_to_call_with_exec(&vec_str(&[
|
||||
"rg",
|
||||
"Cargo.toml",
|
||||
"-n"
|
||||
])));
|
||||
|
||||
// Unsafe flags that do not take an argument (present verbatim).
|
||||
for args in [
|
||||
vec_str(&["rg", "--search-zip", "files"]),
|
||||
vec_str(&["rg", "-z", "files"]),
|
||||
] {
|
||||
assert!(
|
||||
!is_safe_to_call_with_exec(&args),
|
||||
"expected {args:?} to be considered unsafe due to zip-search flag",
|
||||
);
|
||||
}
|
||||
|
||||
// Unsafe flags that expect a value, provided in both split and = forms.
|
||||
for args in [
|
||||
vec_str(&["rg", "--pre", "pwned", "files"]),
|
||||
vec_str(&["rg", "--pre=pwned", "files"]),
|
||||
vec_str(&["rg", "--hostname-bin", "pwned", "files"]),
|
||||
vec_str(&["rg", "--hostname-bin=pwned", "files"]),
|
||||
] {
|
||||
assert!(
|
||||
!is_safe_to_call_with_exec(&args),
|
||||
"expected {args:?} to be considered unsafe due to external-command flag",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn windows_powershell_full_path_is_safe() {
|
||||
if !cfg!(windows) {
|
||||
// Windows only because on Linux path splitting doesn't handle `/` separators properly
|
||||
return;
|
||||
}
|
||||
|
||||
assert!(is_known_safe_command(&vec_str(&[
|
||||
r"C:\Program Files\PowerShell\7\pwsh.exe",
|
||||
"-Command",
|
||||
"Get-Location",
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bash_lc_safe_examples() {
|
||||
assert!(is_known_safe_command(&vec_str(&["bash", "-lc", "ls"])));
|
||||
assert!(is_known_safe_command(&vec_str(&["bash", "-lc", "ls -1"])));
|
||||
assert!(is_known_safe_command(&vec_str(&[
|
||||
"bash",
|
||||
"-lc",
|
||||
"git status"
|
||||
])));
|
||||
assert!(is_known_safe_command(&vec_str(&[
|
||||
"bash",
|
||||
"-lc",
|
||||
"grep -R \"Cargo.toml\" -n"
|
||||
])));
|
||||
assert!(is_known_safe_command(&vec_str(&[
|
||||
"bash",
|
||||
"-lc",
|
||||
"sed -n 1,5p file.txt"
|
||||
])));
|
||||
assert!(is_known_safe_command(&vec_str(&[
|
||||
"bash",
|
||||
"-lc",
|
||||
"sed -n '1,5p' file.txt"
|
||||
])));
|
||||
|
||||
assert!(is_known_safe_command(&vec_str(&[
|
||||
"bash",
|
||||
"-lc",
|
||||
"find . -name file.txt"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bash_lc_safe_examples_with_operators() {
|
||||
assert!(is_known_safe_command(&vec_str(&[
|
||||
"bash",
|
||||
"-lc",
|
||||
"grep -R \"Cargo.toml\" -n || true"
|
||||
])));
|
||||
assert!(is_known_safe_command(&vec_str(&[
|
||||
"bash",
|
||||
"-lc",
|
||||
"ls && pwd"
|
||||
])));
|
||||
assert!(is_known_safe_command(&vec_str(&[
|
||||
"bash",
|
||||
"-lc",
|
||||
"echo 'hi' ; ls"
|
||||
])));
|
||||
assert!(is_known_safe_command(&vec_str(&[
|
||||
"bash",
|
||||
"-lc",
|
||||
"ls | wc -l"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bash_lc_unsafe_examples() {
|
||||
assert!(
|
||||
!is_known_safe_command(&vec_str(&["bash", "-lc", "git", "status"])),
|
||||
"Four arg version is not known to be safe."
|
||||
);
|
||||
assert!(
|
||||
!is_known_safe_command(&vec_str(&["bash", "-lc", "'git status'"])),
|
||||
"The extra quoting around 'git status' makes it a program named 'git status' and is therefore unsafe."
|
||||
);
|
||||
|
||||
assert!(
|
||||
!is_known_safe_command(&vec_str(&["bash", "-lc", "find . -name file.txt -delete"])),
|
||||
"Unsafe find option should not be auto-approved."
|
||||
);
|
||||
|
||||
// Disallowed because of unsafe command in sequence.
|
||||
assert!(
|
||||
!is_known_safe_command(&vec_str(&["bash", "-lc", "ls && rm -rf /"])),
|
||||
"Sequence containing unsafe command must be rejected"
|
||||
);
|
||||
|
||||
// Disallowed because of parentheses / subshell.
|
||||
assert!(
|
||||
!is_known_safe_command(&vec_str(&["bash", "-lc", "(ls)"])),
|
||||
"Parentheses (subshell) are not provably safe with the current parser"
|
||||
);
|
||||
assert!(
|
||||
!is_known_safe_command(&vec_str(&["bash", "-lc", "ls || (pwd && echo hi)"])),
|
||||
"Nested parentheses are not provably safe with the current parser"
|
||||
);
|
||||
|
||||
// Disallowed redirection.
|
||||
assert!(
|
||||
!is_known_safe_command(&vec_str(&["bash", "-lc", "ls > out.txt"])),
|
||||
"> redirection should be rejected"
|
||||
);
|
||||
}
|
||||
}
|
||||
3
codex-rs/shell-command/src/command_safety/mod.rs
Normal file
3
codex-rs/shell-command/src/command_safety/mod.rs
Normal file
@@ -0,0 +1,3 @@
|
||||
pub mod is_dangerous_command;
|
||||
pub mod is_safe_command;
|
||||
pub mod windows_safe_commands;
|
||||
201
codex-rs/shell-command/src/command_safety/powershell_parser.ps1
Normal file
201
codex-rs/shell-command/src/command_safety/powershell_parser.ps1
Normal file
@@ -0,0 +1,201 @@
|
||||
$ErrorActionPreference = 'Stop'
|
||||
|
||||
$payload = $env:CODEX_POWERSHELL_PAYLOAD
|
||||
if ([string]::IsNullOrEmpty($payload)) {
|
||||
Write-Output '{"status":"parse_failed"}'
|
||||
exit 0
|
||||
}
|
||||
|
||||
try {
|
||||
$source =
|
||||
[System.Text.Encoding]::Unicode.GetString(
|
||||
[System.Convert]::FromBase64String($payload)
|
||||
)
|
||||
} catch {
|
||||
Write-Output '{"status":"parse_failed"}'
|
||||
exit 0
|
||||
}
|
||||
|
||||
$tokens = $null
|
||||
$errors = $null
|
||||
|
||||
$ast = $null
|
||||
try {
|
||||
$ast = [System.Management.Automation.Language.Parser]::ParseInput(
|
||||
$source,
|
||||
[ref]$tokens,
|
||||
[ref]$errors
|
||||
)
|
||||
} catch {
|
||||
Write-Output '{"status":"parse_failed"}'
|
||||
exit 0
|
||||
}
|
||||
|
||||
if ($errors.Count -gt 0) {
|
||||
Write-Output '{"status":"parse_errors"}'
|
||||
exit 0
|
||||
}
|
||||
|
||||
function Convert-CommandElement {
|
||||
param($element)
|
||||
|
||||
if ($element -is [System.Management.Automation.Language.StringConstantExpressionAst]) {
|
||||
return @($element.Value)
|
||||
}
|
||||
|
||||
if ($element -is [System.Management.Automation.Language.ExpandableStringExpressionAst]) {
|
||||
if ($element.NestedExpressions.Count -gt 0) {
|
||||
return $null
|
||||
}
|
||||
return @($element.Value)
|
||||
}
|
||||
|
||||
if ($element -is [System.Management.Automation.Language.ConstantExpressionAst]) {
|
||||
return @($element.Value.ToString())
|
||||
}
|
||||
|
||||
if ($element -is [System.Management.Automation.Language.CommandParameterAst]) {
|
||||
if ($element.Argument -eq $null) {
|
||||
return @('-' + $element.ParameterName)
|
||||
}
|
||||
|
||||
if ($element.Argument -is [System.Management.Automation.Language.StringConstantExpressionAst]) {
|
||||
return @('-' + $element.ParameterName, $element.Argument.Value)
|
||||
}
|
||||
|
||||
if ($element.Argument -is [System.Management.Automation.Language.ConstantExpressionAst]) {
|
||||
return @('-' + $element.ParameterName, $element.Argument.Value.ToString())
|
||||
}
|
||||
|
||||
return $null
|
||||
}
|
||||
|
||||
return $null
|
||||
}
|
||||
|
||||
function Convert-PipelineElement {
|
||||
param($element)
|
||||
|
||||
if ($element -is [System.Management.Automation.Language.CommandAst]) {
|
||||
if ($element.Redirections.Count -gt 0) {
|
||||
return $null
|
||||
}
|
||||
|
||||
if (
|
||||
$element.InvocationOperator -ne $null -and
|
||||
$element.InvocationOperator -ne [System.Management.Automation.Language.TokenKind]::Unknown
|
||||
) {
|
||||
return $null
|
||||
}
|
||||
|
||||
$parts = @()
|
||||
foreach ($commandElement in $element.CommandElements) {
|
||||
$converted = Convert-CommandElement $commandElement
|
||||
if ($converted -eq $null) {
|
||||
return $null
|
||||
}
|
||||
$parts += $converted
|
||||
}
|
||||
return $parts
|
||||
}
|
||||
|
||||
if ($element -is [System.Management.Automation.Language.CommandExpressionAst]) {
|
||||
if ($element.Redirections.Count -gt 0) {
|
||||
return $null
|
||||
}
|
||||
|
||||
if ($element.Expression -is [System.Management.Automation.Language.ParenExpressionAst]) {
|
||||
$innerPipeline = $element.Expression.Pipeline
|
||||
if ($innerPipeline -and $innerPipeline.PipelineElements.Count -eq 1) {
|
||||
return Convert-PipelineElement $innerPipeline.PipelineElements[0]
|
||||
}
|
||||
}
|
||||
|
||||
return $null
|
||||
}
|
||||
|
||||
return $null
|
||||
}
|
||||
|
||||
function Add-CommandsFromPipelineAst {
|
||||
param($pipeline, $commands)
|
||||
|
||||
if ($pipeline.PipelineElements.Count -eq 0) {
|
||||
return $false
|
||||
}
|
||||
|
||||
foreach ($element in $pipeline.PipelineElements) {
|
||||
$words = Convert-PipelineElement $element
|
||||
if ($words -eq $null -or $words.Count -eq 0) {
|
||||
return $false
|
||||
}
|
||||
$null = $commands.Add($words)
|
||||
}
|
||||
|
||||
return $true
|
||||
}
|
||||
|
||||
function Add-CommandsFromPipelineChain {
|
||||
param($chain, $commands)
|
||||
|
||||
if (-not (Add-CommandsFromPipelineBase $chain.LhsPipelineChain $commands)) {
|
||||
return $false
|
||||
}
|
||||
|
||||
if (-not (Add-CommandsFromPipelineAst $chain.RhsPipeline $commands)) {
|
||||
return $false
|
||||
}
|
||||
|
||||
return $true
|
||||
}
|
||||
|
||||
function Add-CommandsFromPipelineBase {
|
||||
param($pipeline, $commands)
|
||||
|
||||
if ($pipeline -is [System.Management.Automation.Language.PipelineAst]) {
|
||||
return Add-CommandsFromPipelineAst $pipeline $commands
|
||||
}
|
||||
|
||||
if ($pipeline -is [System.Management.Automation.Language.PipelineChainAst]) {
|
||||
return Add-CommandsFromPipelineChain $pipeline $commands
|
||||
}
|
||||
|
||||
return $false
|
||||
}
|
||||
|
||||
$commands = [System.Collections.ArrayList]::new()
|
||||
|
||||
foreach ($statement in $ast.EndBlock.Statements) {
|
||||
if (-not (Add-CommandsFromPipelineBase $statement $commands)) {
|
||||
$commands = $null
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if ($commands -ne $null) {
|
||||
$normalized = [System.Collections.ArrayList]::new()
|
||||
foreach ($cmd in $commands) {
|
||||
if ($cmd -is [string]) {
|
||||
$null = $normalized.Add(@($cmd))
|
||||
continue
|
||||
}
|
||||
|
||||
if ($cmd -is [System.Array] -or $cmd -is [System.Collections.IEnumerable]) {
|
||||
$null = $normalized.Add(@($cmd))
|
||||
continue
|
||||
}
|
||||
|
||||
$normalized = $null
|
||||
break
|
||||
}
|
||||
|
||||
$commands = $normalized
|
||||
}
|
||||
|
||||
$result = if ($commands -eq $null) {
|
||||
@{ status = 'unsupported' }
|
||||
} else {
|
||||
@{ status = 'ok'; commands = $commands }
|
||||
}
|
||||
|
||||
,$result | ConvertTo-Json -Depth 3
|
||||
@@ -0,0 +1,755 @@
|
||||
use std::path::Path;
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use shlex::split as shlex_split;
|
||||
use url::Url;
|
||||
|
||||
pub fn is_dangerous_command_windows(command: &[String]) -> bool {
|
||||
// Prefer structured parsing for PowerShell/CMD so we can spot URL-bearing
|
||||
// invocations of ShellExecute-style entry points before falling back to
|
||||
// simple argv heuristics.
|
||||
if is_dangerous_powershell(command) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if is_dangerous_cmd(command) {
|
||||
return true;
|
||||
}
|
||||
|
||||
is_direct_gui_launch(command)
|
||||
}
|
||||
|
||||
fn is_dangerous_powershell(command: &[String]) -> bool {
|
||||
let Some((exe, rest)) = command.split_first() else {
|
||||
return false;
|
||||
};
|
||||
if !is_powershell_executable(exe) {
|
||||
return false;
|
||||
}
|
||||
// Parse the PowerShell invocation to get a flat token list we can scan for
|
||||
// dangerous cmdlets/COM calls plus any URL-looking arguments. This is a
|
||||
// best-effort shlex split of the script text, not a full PS parser.
|
||||
let Some(parsed) = parse_powershell_invocation(rest) else {
|
||||
return false;
|
||||
};
|
||||
|
||||
let tokens_lc: Vec<String> = parsed
|
||||
.tokens
|
||||
.iter()
|
||||
.map(|t| t.trim_matches('\'').trim_matches('"').to_ascii_lowercase())
|
||||
.collect();
|
||||
let has_url = args_have_url(&parsed.tokens);
|
||||
|
||||
if has_url
|
||||
&& tokens_lc.iter().any(|t| {
|
||||
matches!(
|
||||
t.as_str(),
|
||||
"start-process" | "start" | "saps" | "invoke-item" | "ii"
|
||||
) || t.contains("start-process")
|
||||
|| t.contains("invoke-item")
|
||||
})
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if has_url
|
||||
&& tokens_lc
|
||||
.iter()
|
||||
.any(|t| t.contains("shellexecute") || t.contains("shell.application"))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if let Some(first) = tokens_lc.first() {
|
||||
// Legacy ShellExecute path via url.dll
|
||||
if first == "rundll32"
|
||||
&& tokens_lc
|
||||
.iter()
|
||||
.any(|t| t.contains("url.dll,fileprotocolhandler"))
|
||||
&& has_url
|
||||
{
|
||||
return true;
|
||||
}
|
||||
if first == "mshta" && has_url {
|
||||
return true;
|
||||
}
|
||||
if is_browser_executable(first) && has_url {
|
||||
return true;
|
||||
}
|
||||
if matches!(first.as_str(), "explorer" | "explorer.exe") && has_url {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for force delete operations (e.g., Remove-Item -Force)
|
||||
if has_force_delete_cmdlet(&tokens_lc) {
|
||||
return true;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
fn is_dangerous_cmd(command: &[String]) -> bool {
|
||||
let Some((exe, rest)) = command.split_first() else {
|
||||
return false;
|
||||
};
|
||||
let Some(base) = executable_basename(exe) else {
|
||||
return false;
|
||||
};
|
||||
if base != "cmd" && base != "cmd.exe" {
|
||||
return false;
|
||||
}
|
||||
|
||||
let mut iter = rest.iter();
|
||||
for arg in iter.by_ref() {
|
||||
let lower = arg.to_ascii_lowercase();
|
||||
match lower.as_str() {
|
||||
"/c" | "/r" | "-c" => break,
|
||||
_ if lower.starts_with('/') => continue,
|
||||
// Unknown tokens before the command body => bail.
|
||||
_ => return false,
|
||||
}
|
||||
}
|
||||
|
||||
let remaining: Vec<String> = iter.cloned().collect();
|
||||
if remaining.is_empty() {
|
||||
return false;
|
||||
}
|
||||
|
||||
let cmd_tokens: Vec<String> = match remaining.as_slice() {
|
||||
[only] => shlex_split(only).unwrap_or_else(|| vec![only.clone()]),
|
||||
_ => remaining,
|
||||
};
|
||||
|
||||
// Refine tokens by splitting concatenated CMD operators (e.g. "echo hi&del")
|
||||
let tokens: Vec<String> = cmd_tokens
|
||||
.into_iter()
|
||||
.flat_map(|t| split_embedded_cmd_operators(&t))
|
||||
.collect();
|
||||
|
||||
const CMD_SEPARATORS: &[&str] = &["&", "&&", "|", "||"];
|
||||
tokens
|
||||
.split(|t| CMD_SEPARATORS.contains(&t.as_str()))
|
||||
.any(|segment| {
|
||||
let Some(cmd) = segment.first() else {
|
||||
return false;
|
||||
};
|
||||
|
||||
// Classic `cmd /c ... start https://...` ShellExecute path.
|
||||
if cmd.eq_ignore_ascii_case("start") && args_have_url(segment) {
|
||||
return true;
|
||||
}
|
||||
// Force delete: del /f, erase /f
|
||||
if (cmd.eq_ignore_ascii_case("del") || cmd.eq_ignore_ascii_case("erase"))
|
||||
&& has_force_flag_cmd(segment)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
// Recursive directory removal: rd /s /q, rmdir /s /q
|
||||
if (cmd.eq_ignore_ascii_case("rd") || cmd.eq_ignore_ascii_case("rmdir"))
|
||||
&& has_recursive_flag_cmd(segment)
|
||||
&& has_quiet_flag_cmd(segment)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
false
|
||||
})
|
||||
}
|
||||
|
||||
fn is_direct_gui_launch(command: &[String]) -> bool {
|
||||
let Some((exe, rest)) = command.split_first() else {
|
||||
return false;
|
||||
};
|
||||
let Some(base) = executable_basename(exe) else {
|
||||
return false;
|
||||
};
|
||||
|
||||
// Explorer/rundll32/mshta or direct browser exe with a URL anywhere in args.
|
||||
if matches!(base.as_str(), "explorer" | "explorer.exe") && args_have_url(rest) {
|
||||
return true;
|
||||
}
|
||||
if matches!(base.as_str(), "mshta" | "mshta.exe") && args_have_url(rest) {
|
||||
return true;
|
||||
}
|
||||
if (base == "rundll32" || base == "rundll32.exe")
|
||||
&& rest.iter().any(|t| {
|
||||
t.to_ascii_lowercase()
|
||||
.contains("url.dll,fileprotocolhandler")
|
||||
})
|
||||
&& args_have_url(rest)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
if is_browser_executable(&base) && args_have_url(rest) {
|
||||
return true;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
fn split_embedded_cmd_operators(token: &str) -> Vec<String> {
|
||||
// Split concatenated CMD operators so `echo hi&del` becomes `["echo hi", "&", "del"]`.
|
||||
// Handles `&`, `&&`, `|`, `||`. Best-effort (CMD escaping is weird by nature).
|
||||
let mut parts = Vec::new();
|
||||
let mut start = 0;
|
||||
let mut it = token.char_indices().peekable();
|
||||
|
||||
while let Some((i, ch)) = it.next() {
|
||||
if ch == '&' || ch == '|' {
|
||||
if i > start {
|
||||
parts.push(token[start..i].to_string());
|
||||
}
|
||||
|
||||
// Detect doubled operator: && or ||
|
||||
let op_len = match it.peek() {
|
||||
Some(&(j, next)) if next == ch => {
|
||||
it.next(); // consume second char
|
||||
(j + next.len_utf8()) - i
|
||||
}
|
||||
_ => ch.len_utf8(),
|
||||
};
|
||||
|
||||
parts.push(token[i..i + op_len].to_string());
|
||||
start = i + op_len;
|
||||
}
|
||||
}
|
||||
|
||||
if start < token.len() {
|
||||
parts.push(token[start..].to_string());
|
||||
}
|
||||
|
||||
parts.retain(|s| !s.trim().is_empty());
|
||||
parts
|
||||
}
|
||||
|
||||
fn has_force_delete_cmdlet(tokens: &[String]) -> bool {
|
||||
const DELETE_CMDLETS: &[&str] = &["remove-item", "ri", "rm", "del", "erase", "rd", "rmdir"];
|
||||
|
||||
// Hard separators that end a command segment (so -Force must be in same segment)
|
||||
const SEG_SEPS: &[char] = &[';', '|', '&', '\n', '\r', '\t'];
|
||||
|
||||
// Soft separators: punctuation that can stick to tokens (blocks, parens, brackets, commas, etc.)
|
||||
const SOFT_SEPS: &[char] = &['{', '}', '(', ')', '[', ']', ',', ';'];
|
||||
|
||||
// Build rough command segments first
|
||||
let mut segments: Vec<Vec<String>> = vec![Vec::new()];
|
||||
for tok in tokens {
|
||||
// If token itself contains segment separators, split it (best-effort)
|
||||
let mut cur = String::new();
|
||||
for ch in tok.chars() {
|
||||
if SEG_SEPS.contains(&ch) {
|
||||
let s = cur.trim();
|
||||
if let Some(msg) = segments.last_mut()
|
||||
&& !s.is_empty()
|
||||
{
|
||||
msg.push(s.to_string());
|
||||
}
|
||||
cur.clear();
|
||||
if let Some(last) = segments.last()
|
||||
&& !last.is_empty()
|
||||
{
|
||||
segments.push(Vec::new());
|
||||
}
|
||||
} else {
|
||||
cur.push(ch);
|
||||
}
|
||||
}
|
||||
let s = cur.trim();
|
||||
if let Some(segment) = segments.last_mut()
|
||||
&& !s.is_empty()
|
||||
{
|
||||
segment.push(s.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
// Now, inside each segment, normalize tokens by splitting on soft punctuation
|
||||
segments.into_iter().any(|seg| {
|
||||
let atoms = seg
|
||||
.iter()
|
||||
.flat_map(|t| t.split(|c| SOFT_SEPS.contains(&c)))
|
||||
.map(str::trim)
|
||||
.filter(|s| !s.is_empty());
|
||||
|
||||
let mut has_delete = false;
|
||||
let mut has_force = false;
|
||||
|
||||
for a in atoms {
|
||||
if DELETE_CMDLETS.iter().any(|cmd| a.eq_ignore_ascii_case(cmd)) {
|
||||
has_delete = true;
|
||||
}
|
||||
if a.eq_ignore_ascii_case("-force")
|
||||
|| a.get(..7)
|
||||
.is_some_and(|p| p.eq_ignore_ascii_case("-force:"))
|
||||
{
|
||||
has_force = true;
|
||||
}
|
||||
}
|
||||
|
||||
has_delete && has_force
|
||||
})
|
||||
}
|
||||
|
||||
/// Check for /f or /F flag in CMD del/erase arguments.
|
||||
fn has_force_flag_cmd(args: &[String]) -> bool {
|
||||
args.iter().any(|a| a.eq_ignore_ascii_case("/f"))
|
||||
}
|
||||
|
||||
/// Check for /s or /S flag in CMD rd/rmdir arguments.
|
||||
fn has_recursive_flag_cmd(args: &[String]) -> bool {
|
||||
args.iter().any(|a| a.eq_ignore_ascii_case("/s"))
|
||||
}
|
||||
|
||||
/// Check for /q or /Q flag in CMD rd/rmdir arguments.
|
||||
fn has_quiet_flag_cmd(args: &[String]) -> bool {
|
||||
args.iter().any(|a| a.eq_ignore_ascii_case("/q"))
|
||||
}
|
||||
|
||||
fn args_have_url(args: &[String]) -> bool {
|
||||
args.iter().any(|arg| looks_like_url(arg))
|
||||
}
|
||||
|
||||
fn looks_like_url(token: &str) -> bool {
|
||||
// Strip common PowerShell punctuation around inline URLs (quotes, parens, trailing semicolons).
|
||||
// Capture the middle token after trimming leading quotes/parens/whitespace and trailing semicolons/closing parens.
|
||||
static RE: Lazy<Option<Regex>> =
|
||||
Lazy::new(|| Regex::new(r#"^[ "'\(\s]*([^\s"'\);]+)[\s;\)]*$"#).ok());
|
||||
// If the token embeds a URL alongside other text (e.g., Start-Process('https://...'))
|
||||
// as a single shlex token, grab the substring starting at the first URL prefix.
|
||||
let urlish = token
|
||||
.find("https://")
|
||||
.or_else(|| token.find("http://"))
|
||||
.map(|idx| &token[idx..])
|
||||
.unwrap_or(token);
|
||||
|
||||
let candidate = RE
|
||||
.as_ref()
|
||||
.and_then(|re| re.captures(urlish))
|
||||
.and_then(|caps| caps.get(1))
|
||||
.map(|m| m.as_str())
|
||||
.unwrap_or(urlish);
|
||||
let Ok(url) = Url::parse(candidate) else {
|
||||
return false;
|
||||
};
|
||||
matches!(url.scheme(), "http" | "https")
|
||||
}
|
||||
|
||||
fn executable_basename(exe: &str) -> Option<String> {
|
||||
Path::new(exe)
|
||||
.file_name()
|
||||
.and_then(|osstr| osstr.to_str())
|
||||
.map(str::to_ascii_lowercase)
|
||||
}
|
||||
|
||||
fn is_powershell_executable(exe: &str) -> bool {
|
||||
matches!(
|
||||
executable_basename(exe).as_deref(),
|
||||
Some("powershell") | Some("powershell.exe") | Some("pwsh") | Some("pwsh.exe")
|
||||
)
|
||||
}
|
||||
|
||||
fn is_browser_executable(name: &str) -> bool {
|
||||
matches!(
|
||||
name,
|
||||
"chrome"
|
||||
| "chrome.exe"
|
||||
| "msedge"
|
||||
| "msedge.exe"
|
||||
| "firefox"
|
||||
| "firefox.exe"
|
||||
| "iexplore"
|
||||
| "iexplore.exe"
|
||||
)
|
||||
}
|
||||
|
||||
struct ParsedPowershell {
|
||||
tokens: Vec<String>,
|
||||
}
|
||||
|
||||
fn parse_powershell_invocation(args: &[String]) -> Option<ParsedPowershell> {
|
||||
if args.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut idx = 0;
|
||||
while idx < args.len() {
|
||||
let arg = &args[idx];
|
||||
let lower = arg.to_ascii_lowercase();
|
||||
match lower.as_str() {
|
||||
"-command" | "/command" | "-c" => {
|
||||
let script = args.get(idx + 1)?;
|
||||
if idx + 2 != args.len() {
|
||||
return None;
|
||||
}
|
||||
let tokens = shlex_split(script)?;
|
||||
return Some(ParsedPowershell { tokens });
|
||||
}
|
||||
_ if lower.starts_with("-command:") || lower.starts_with("/command:") => {
|
||||
if idx + 1 != args.len() {
|
||||
return None;
|
||||
}
|
||||
let (_, script) = arg.split_once(':')?;
|
||||
let tokens = shlex_split(script)?;
|
||||
return Some(ParsedPowershell { tokens });
|
||||
}
|
||||
"-nologo" | "-noprofile" | "-noninteractive" | "-mta" | "-sta" => {
|
||||
idx += 1;
|
||||
}
|
||||
_ if lower.starts_with('-') => {
|
||||
idx += 1;
|
||||
}
|
||||
_ => {
|
||||
let rest = args[idx..].to_vec();
|
||||
return Some(ParsedPowershell { tokens: rest });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::is_dangerous_command_windows;
|
||||
|
||||
fn vec_str(items: &[&str]) -> Vec<String> {
|
||||
items.iter().map(std::string::ToString::to_string).collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn powershell_start_process_url_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"powershell",
|
||||
"-NoLogo",
|
||||
"-Command",
|
||||
"Start-Process 'https://example.com'"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn powershell_start_process_url_with_trailing_semicolon_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"powershell",
|
||||
"-Command",
|
||||
"Start-Process('https://example.com');"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn powershell_start_process_local_is_not_flagged() {
|
||||
assert!(!is_dangerous_command_windows(&vec_str(&[
|
||||
"powershell",
|
||||
"-Command",
|
||||
"Start-Process notepad.exe"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmd_start_with_url_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"cmd",
|
||||
"/c",
|
||||
"start",
|
||||
"https://example.com"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn msedge_with_url_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"msedge.exe",
|
||||
"https://example.com"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn explorer_with_directory_is_not_flagged() {
|
||||
assert!(!is_dangerous_command_windows(&vec_str(&[
|
||||
"explorer.exe",
|
||||
"."
|
||||
])));
|
||||
}
|
||||
|
||||
// Force delete tests for PowerShell
|
||||
|
||||
#[test]
|
||||
fn powershell_remove_item_force_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"powershell",
|
||||
"-Command",
|
||||
"Remove-Item test -Force"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn powershell_remove_item_recurse_force_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"powershell",
|
||||
"-Command",
|
||||
"Remove-Item test -Recurse -Force"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn powershell_ri_alias_force_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"pwsh",
|
||||
"-Command",
|
||||
"ri test -Force"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn powershell_remove_item_without_force_is_not_flagged() {
|
||||
assert!(!is_dangerous_command_windows(&vec_str(&[
|
||||
"powershell",
|
||||
"-Command",
|
||||
"Remove-Item test"
|
||||
])));
|
||||
}
|
||||
|
||||
// Force delete tests for CMD
|
||||
#[test]
|
||||
fn cmd_del_force_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"cmd", "/c", "del", "/f", "test.txt"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmd_erase_force_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"cmd", "/c", "erase", "/f", "test.txt"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmd_del_without_force_is_not_flagged() {
|
||||
assert!(!is_dangerous_command_windows(&vec_str(&[
|
||||
"cmd", "/c", "del", "test.txt"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmd_rd_recursive_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"cmd", "/c", "rd", "/s", "/q", "test"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmd_rd_without_quiet_is_not_flagged() {
|
||||
assert!(!is_dangerous_command_windows(&vec_str(&[
|
||||
"cmd", "/c", "rd", "/s", "test"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmd_rmdir_recursive_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"cmd", "/c", "rmdir", "/s", "/q", "test"
|
||||
])));
|
||||
}
|
||||
|
||||
// Test exact scenario from issue #8567
|
||||
#[test]
|
||||
fn powershell_remove_item_path_recurse_force_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"powershell",
|
||||
"-Command",
|
||||
"Remove-Item -Path 'test' -Recurse -Force"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn powershell_remove_item_force_with_semicolon_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"powershell",
|
||||
"-Command",
|
||||
"Remove-Item test -Force; Write-Host done"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn powershell_remove_item_force_inside_block_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"powershell",
|
||||
"-Command",
|
||||
"if ($true) { Remove-Item test -Force}"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn powershell_remove_item_force_inside_brackets_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"powershell",
|
||||
"-Command",
|
||||
"[void]( Remove-Item test -Force)]"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmd_del_path_containing_f_is_not_flagged() {
|
||||
assert!(!is_dangerous_command_windows(&vec_str(&[
|
||||
"cmd",
|
||||
"/c",
|
||||
"del",
|
||||
"C:/foo/bar.txt"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmd_rd_path_containing_s_is_not_flagged() {
|
||||
assert!(!is_dangerous_command_windows(&vec_str(&[
|
||||
"cmd",
|
||||
"/c",
|
||||
"rd",
|
||||
"C:/source"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmd_bypass_chained_del_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"cmd", "/c", "echo", "hello", "&", "del", "/f", "file.txt"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn powershell_chained_no_space_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"powershell",
|
||||
"-Command",
|
||||
"Write-Host hi;Remove-Item -Force C:\\tmp"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn powershell_comma_separated_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"powershell",
|
||||
"-Command",
|
||||
"del,-Force,C:\\foo"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmd_echo_del_is_not_dangerous() {
|
||||
assert!(!is_dangerous_command_windows(&vec_str(&[
|
||||
"cmd", "/c", "echo", "del", "/f"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmd_del_single_string_argument_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"cmd",
|
||||
"/c",
|
||||
"del /f file.txt"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmd_del_chained_single_string_argument_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"cmd",
|
||||
"/c",
|
||||
"echo hello & del /f file.txt"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmd_chained_no_space_del_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"cmd",
|
||||
"/c",
|
||||
"echo hi&del /f file.txt"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmd_chained_andand_no_space_del_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"cmd",
|
||||
"/c",
|
||||
"echo hi&&del /f file.txt"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmd_chained_oror_no_space_del_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"cmd",
|
||||
"/c",
|
||||
"echo hi||del /f file.txt"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmd_start_url_single_string_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"cmd",
|
||||
"/c",
|
||||
"start https://example.com"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmd_chained_no_space_rmdir_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"cmd",
|
||||
"/c",
|
||||
"echo hi&rmdir /s /q testdir"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmd_del_force_uppercase_flag_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"cmd", "/c", "DEL", "/F", "file.txt"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmdexe_r_del_force_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"cmd.exe", "/r", "del", "/f", "file.txt"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmd_start_quoted_url_single_string_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"cmd",
|
||||
"/c",
|
||||
r#"start "https://example.com""#
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cmd_start_title_then_url_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"cmd",
|
||||
"/c",
|
||||
r#"start "" https://example.com"#
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn powershell_rm_alias_force_is_dangerous() {
|
||||
assert!(is_dangerous_command_windows(&vec_str(&[
|
||||
"powershell",
|
||||
"-Command",
|
||||
"rm test -Force"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn powershell_benign_force_separate_command_is_not_dangerous() {
|
||||
assert!(!is_dangerous_command_windows(&vec_str(&[
|
||||
"powershell",
|
||||
"-Command",
|
||||
"Get-ChildItem -Force; Remove-Item test"
|
||||
])));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,623 @@
|
||||
use base64::Engine;
|
||||
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
|
||||
use serde::Deserialize;
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
const POWERSHELL_PARSER_SCRIPT: &str = include_str!("powershell_parser.ps1");
|
||||
|
||||
/// On Windows, we conservatively allow only clearly read-only PowerShell invocations
|
||||
/// that match a small safelist. Anything else (including direct CMD commands) is unsafe.
|
||||
pub fn is_safe_command_windows(command: &[String]) -> bool {
|
||||
if let Some(commands) = try_parse_powershell_command_sequence(command) {
|
||||
commands
|
||||
.iter()
|
||||
.all(|cmd| is_safe_powershell_command(cmd.as_slice()))
|
||||
} else {
|
||||
// Only PowerShell invocations are allowed on Windows for now; anything else is unsafe.
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns each command sequence if the invocation starts with a PowerShell binary.
|
||||
/// For example, the tokens from `pwsh Get-ChildItem | Measure-Object` become two sequences.
|
||||
fn try_parse_powershell_command_sequence(command: &[String]) -> Option<Vec<Vec<String>>> {
|
||||
let (exe, rest) = command.split_first()?;
|
||||
if is_powershell_executable(exe) {
|
||||
parse_powershell_invocation(exe, rest)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses a PowerShell invocation into discrete command vectors, rejecting unsafe patterns.
|
||||
fn parse_powershell_invocation(executable: &str, args: &[String]) -> Option<Vec<Vec<String>>> {
|
||||
if args.is_empty() {
|
||||
// Examples rejected here: "pwsh" and "powershell.exe" with no additional arguments.
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut idx = 0;
|
||||
while idx < args.len() {
|
||||
let arg = &args[idx];
|
||||
let lower = arg.to_ascii_lowercase();
|
||||
match lower.as_str() {
|
||||
"-command" | "/command" | "-c" => {
|
||||
let script = args.get(idx + 1)?;
|
||||
if idx + 2 != args.len() {
|
||||
// Reject if there is more than one token representing the actual command.
|
||||
// Examples rejected here: "pwsh -Command foo bar" and "powershell -c ls extra".
|
||||
return None;
|
||||
}
|
||||
return parse_powershell_script(executable, script);
|
||||
}
|
||||
_ if lower.starts_with("-command:") || lower.starts_with("/command:") => {
|
||||
if idx + 1 != args.len() {
|
||||
// Reject if there are more tokens after the command itself.
|
||||
// Examples rejected here: "pwsh -Command:dir C:\\" and "powershell /Command:dir C:\\" with trailing args.
|
||||
return None;
|
||||
}
|
||||
let script = arg.split_once(':')?.1;
|
||||
return parse_powershell_script(executable, script);
|
||||
}
|
||||
|
||||
// Benign, no-arg flags we tolerate.
|
||||
"-nologo" | "-noprofile" | "-noninteractive" | "-mta" | "-sta" => {
|
||||
idx += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Explicitly forbidden/opaque or unnecessary for read-only operations.
|
||||
"-encodedcommand" | "-ec" | "-file" | "/file" | "-windowstyle" | "-executionpolicy"
|
||||
| "-workingdirectory" => {
|
||||
// Examples rejected here: "pwsh -EncodedCommand ..." and "powershell -File script.ps1".
|
||||
return None;
|
||||
}
|
||||
|
||||
// Unknown switch → bail conservatively.
|
||||
_ if lower.starts_with('-') => {
|
||||
// Examples rejected here: "pwsh -UnknownFlag" and "powershell -foo bar".
|
||||
return None;
|
||||
}
|
||||
|
||||
// If we hit non-flag tokens, treat the remainder as a command sequence.
|
||||
// This happens if powershell is invoked without -Command, e.g.
|
||||
// ["pwsh", "-NoLogo", "git", "-c", "core.pager=cat", "status"]
|
||||
_ => {
|
||||
let script = join_arguments_as_script(&args[idx..]);
|
||||
return parse_powershell_script(executable, &script);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Examples rejected here: "pwsh" and "powershell.exe -NoLogo" without a script.
|
||||
None
|
||||
}
|
||||
|
||||
/// Tokenizes an inline PowerShell script and delegates to the command splitter.
|
||||
/// Examples of when this is called: pwsh.exe -Command '<script>' or pwsh.exe -Command:<script>
|
||||
fn parse_powershell_script(executable: &str, script: &str) -> Option<Vec<Vec<String>>> {
|
||||
if let PowershellParseOutcome::Commands(commands) =
|
||||
parse_with_powershell_ast(executable, script)
|
||||
{
|
||||
Some(commands)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true when the executable name is one of the supported PowerShell binaries.
|
||||
fn is_powershell_executable(exe: &str) -> bool {
|
||||
let executable_name = Path::new(exe)
|
||||
.file_name()
|
||||
.and_then(|osstr| osstr.to_str())
|
||||
.unwrap_or(exe)
|
||||
.to_ascii_lowercase();
|
||||
|
||||
matches!(
|
||||
executable_name.as_str(),
|
||||
"powershell" | "powershell.exe" | "pwsh" | "pwsh.exe"
|
||||
)
|
||||
}
|
||||
|
||||
/// Attempts to parse PowerShell using the real PowerShell parser, returning every pipeline element
|
||||
/// as a flat argv vector when possible. If parsing fails or the AST includes unsupported constructs,
|
||||
/// we conservatively reject the command instead of trying to split it manually.
|
||||
fn parse_with_powershell_ast(executable: &str, script: &str) -> PowershellParseOutcome {
|
||||
let encoded_script = encode_powershell_base64(script);
|
||||
let encoded_parser_script = encoded_parser_script();
|
||||
match Command::new(executable)
|
||||
.args([
|
||||
"-NoLogo",
|
||||
"-NoProfile",
|
||||
"-NonInteractive",
|
||||
"-EncodedCommand",
|
||||
encoded_parser_script,
|
||||
])
|
||||
.env("CODEX_POWERSHELL_PAYLOAD", &encoded_script)
|
||||
.output()
|
||||
{
|
||||
Ok(output) if output.status.success() => {
|
||||
if let Ok(result) =
|
||||
serde_json::from_slice::<PowershellParserOutput>(output.stdout.as_slice())
|
||||
{
|
||||
result.into_outcome()
|
||||
} else {
|
||||
PowershellParseOutcome::Failed
|
||||
}
|
||||
}
|
||||
_ => PowershellParseOutcome::Failed,
|
||||
}
|
||||
}
|
||||
|
||||
fn encode_powershell_base64(script: &str) -> String {
|
||||
let mut utf16 = Vec::with_capacity(script.len() * 2);
|
||||
for unit in script.encode_utf16() {
|
||||
utf16.extend_from_slice(&unit.to_le_bytes());
|
||||
}
|
||||
BASE64_STANDARD.encode(utf16)
|
||||
}
|
||||
|
||||
fn encoded_parser_script() -> &'static str {
|
||||
static ENCODED: LazyLock<String> =
|
||||
LazyLock::new(|| encode_powershell_base64(POWERSHELL_PARSER_SCRIPT));
|
||||
&ENCODED
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
struct PowershellParserOutput {
|
||||
status: String,
|
||||
commands: Option<Vec<Vec<String>>>,
|
||||
}
|
||||
|
||||
impl PowershellParserOutput {
|
||||
fn into_outcome(self) -> PowershellParseOutcome {
|
||||
match self.status.as_str() {
|
||||
"ok" => self
|
||||
.commands
|
||||
.filter(|commands| {
|
||||
!commands.is_empty()
|
||||
&& commands
|
||||
.iter()
|
||||
.all(|cmd| !cmd.is_empty() && cmd.iter().all(|word| !word.is_empty()))
|
||||
})
|
||||
.map(PowershellParseOutcome::Commands)
|
||||
.unwrap_or(PowershellParseOutcome::Unsupported),
|
||||
"unsupported" => PowershellParseOutcome::Unsupported,
|
||||
_ => PowershellParseOutcome::Failed,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum PowershellParseOutcome {
|
||||
Commands(Vec<Vec<String>>),
|
||||
Unsupported,
|
||||
Failed,
|
||||
}
|
||||
|
||||
fn join_arguments_as_script(args: &[String]) -> String {
|
||||
let mut words = Vec::with_capacity(args.len());
|
||||
if let Some((first, rest)) = args.split_first() {
|
||||
words.push(first.clone());
|
||||
for arg in rest {
|
||||
words.push(quote_argument(arg));
|
||||
}
|
||||
}
|
||||
words.join(" ")
|
||||
}
|
||||
|
||||
fn quote_argument(arg: &str) -> String {
|
||||
if arg.is_empty() {
|
||||
return "''".to_string();
|
||||
}
|
||||
|
||||
if arg.chars().all(|ch| !ch.is_whitespace()) {
|
||||
return arg.to_string();
|
||||
}
|
||||
|
||||
format!("'{}'", arg.replace('\'', "''"))
|
||||
}
|
||||
|
||||
/// Validates that a parsed PowerShell command stays within our read-only safelist.
|
||||
/// Everything before this is parsing, and rejecting things that make us feel uncomfortable.
|
||||
fn is_safe_powershell_command(words: &[String]) -> bool {
|
||||
if words.is_empty() {
|
||||
// Examples rejected here: "pwsh -Command ''" and "pwsh -Command \"\"".
|
||||
return false;
|
||||
}
|
||||
|
||||
// Reject nested unsafe cmdlets inside parentheses or arguments
|
||||
for w in words.iter() {
|
||||
let inner = w
|
||||
.trim_matches(|c| c == '(' || c == ')')
|
||||
.trim_start_matches('-')
|
||||
.to_ascii_lowercase();
|
||||
if matches!(
|
||||
inner.as_str(),
|
||||
"set-content"
|
||||
| "add-content"
|
||||
| "out-file"
|
||||
| "new-item"
|
||||
| "remove-item"
|
||||
| "move-item"
|
||||
| "copy-item"
|
||||
| "rename-item"
|
||||
| "start-process"
|
||||
| "stop-process"
|
||||
) {
|
||||
// Examples rejected here: "Write-Output (Set-Content foo6.txt 'abc')" and "Get-Content (New-Item bar.txt)".
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
let command = words[0]
|
||||
.trim_matches(|c| c == '(' || c == ')')
|
||||
.trim_start_matches('-')
|
||||
.to_ascii_lowercase();
|
||||
match command.as_str() {
|
||||
"echo" | "write-output" | "write-host" => true, // (no redirection allowed)
|
||||
"dir" | "ls" | "get-childitem" | "gci" => true,
|
||||
"cat" | "type" | "gc" | "get-content" => true,
|
||||
"select-string" | "sls" | "findstr" => true,
|
||||
"measure-object" | "measure" => true,
|
||||
"get-location" | "gl" | "pwd" => true,
|
||||
"test-path" | "tp" => true,
|
||||
"resolve-path" | "rvpa" => true,
|
||||
"select-object" | "select" => true,
|
||||
"get-item" => true,
|
||||
|
||||
"git" => is_safe_git_command(words),
|
||||
|
||||
"rg" => is_safe_ripgrep(words),
|
||||
|
||||
// Extra safety: explicitly prohibit common side-effecting cmdlets regardless of args.
|
||||
"set-content" | "add-content" | "out-file" | "new-item" | "remove-item" | "move-item"
|
||||
| "copy-item" | "rename-item" | "start-process" | "stop-process" => {
|
||||
// Examples rejected here: "pwsh -Command 'Set-Content notes.txt data'" and "pwsh -Command 'Remove-Item temp.log'".
|
||||
false
|
||||
}
|
||||
|
||||
_ => {
|
||||
// Examples rejected here: "pwsh -Command 'Invoke-WebRequest https://example.com'" and "pwsh -Command 'Start-Service Spooler'".
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks that an `rg` invocation avoids options that can spawn arbitrary executables.
|
||||
fn is_safe_ripgrep(words: &[String]) -> bool {
|
||||
const UNSAFE_RIPGREP_OPTIONS_WITH_ARGS: &[&str] = &["--pre", "--hostname-bin"];
|
||||
const UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS: &[&str] = &["--search-zip", "-z"];
|
||||
|
||||
!words.iter().skip(1).any(|arg| {
|
||||
let arg_lc = arg.to_ascii_lowercase();
|
||||
// Examples rejected here: "pwsh -Command 'rg --pre cat pattern'" and "pwsh -Command 'rg --search-zip pattern'".
|
||||
UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS.contains(&arg_lc.as_str())
|
||||
|| UNSAFE_RIPGREP_OPTIONS_WITH_ARGS
|
||||
.iter()
|
||||
.any(|opt| arg_lc == *opt || arg_lc.starts_with(&format!("{opt}=")))
|
||||
})
|
||||
}
|
||||
|
||||
/// Ensures a Git command sticks to whitelisted read-only subcommands and flags.
|
||||
fn is_safe_git_command(words: &[String]) -> bool {
|
||||
const SAFE_SUBCOMMANDS: &[&str] = &["status", "log", "show", "diff", "cat-file"];
|
||||
|
||||
let mut iter = words.iter().skip(1);
|
||||
while let Some(arg) = iter.next() {
|
||||
let arg_lc = arg.to_ascii_lowercase();
|
||||
|
||||
if arg.starts_with('-') {
|
||||
if arg.eq_ignore_ascii_case("-c") || arg.eq_ignore_ascii_case("--config") {
|
||||
if iter.next().is_none() {
|
||||
// Examples rejected here: "pwsh -Command 'git -c'" and "pwsh -Command 'git --config'".
|
||||
return false;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if arg_lc.starts_with("-c=")
|
||||
|| arg_lc.starts_with("--config=")
|
||||
|| arg_lc.starts_with("--git-dir=")
|
||||
|| arg_lc.starts_with("--work-tree=")
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if arg.eq_ignore_ascii_case("--git-dir") || arg.eq_ignore_ascii_case("--work-tree") {
|
||||
if iter.next().is_none() {
|
||||
// Examples rejected here: "pwsh -Command 'git --git-dir'" and "pwsh -Command 'git --work-tree'".
|
||||
return false;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
return SAFE_SUBCOMMANDS.contains(&arg_lc.as_str());
|
||||
}
|
||||
|
||||
// Examples rejected here: "pwsh -Command 'git'" and "pwsh -Command 'git status --short | Remove-Item foo'".
|
||||
false
|
||||
}
|
||||
|
||||
#[cfg(all(test, windows))]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::powershell::try_find_pwsh_executable_blocking;
|
||||
use std::string::ToString;
|
||||
|
||||
/// Converts a slice of string literals into owned `String`s for the tests.
|
||||
fn vec_str(args: &[&str]) -> Vec<String> {
|
||||
args.iter().map(ToString::to_string).collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn recognizes_safe_powershell_wrappers() {
|
||||
assert!(is_safe_command_windows(&vec_str(&[
|
||||
"powershell.exe",
|
||||
"-NoLogo",
|
||||
"-Command",
|
||||
"Get-ChildItem -Path .",
|
||||
])));
|
||||
|
||||
assert!(is_safe_command_windows(&vec_str(&[
|
||||
"powershell.exe",
|
||||
"-NoProfile",
|
||||
"-Command",
|
||||
"git status",
|
||||
])));
|
||||
|
||||
assert!(is_safe_command_windows(&vec_str(&[
|
||||
"powershell.exe",
|
||||
"Get-Content",
|
||||
"Cargo.toml",
|
||||
])));
|
||||
|
||||
// pwsh parity
|
||||
if let Some(pwsh) = try_find_pwsh_executable_blocking() {
|
||||
assert!(is_safe_command_windows(&[
|
||||
pwsh.as_path().to_str().unwrap().into(),
|
||||
"-NoProfile".to_string(),
|
||||
"-Command".to_string(),
|
||||
"Get-ChildItem".to_string(),
|
||||
]));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn accepts_full_path_powershell_invocations() {
|
||||
if !cfg!(windows) {
|
||||
// Windows only because on Linux path splitting doesn't handle `/` separators properly
|
||||
return;
|
||||
}
|
||||
|
||||
if let Some(pwsh) = try_find_pwsh_executable_blocking() {
|
||||
assert!(is_safe_command_windows(&[
|
||||
pwsh.as_path().to_str().unwrap().into(),
|
||||
"-NoProfile".to_string(),
|
||||
"-Command".to_string(),
|
||||
"Get-ChildItem -Path .".to_string(),
|
||||
]));
|
||||
}
|
||||
|
||||
assert!(is_safe_command_windows(&vec_str(&[
|
||||
r"C:\Windows\System32\WindowsPowerShell\v1.0\powershell.exe",
|
||||
"-Command",
|
||||
"Get-Content Cargo.toml",
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allows_read_only_pipelines_and_git_usage() {
|
||||
let Some(pwsh) = try_find_pwsh_executable_blocking() else {
|
||||
return;
|
||||
};
|
||||
|
||||
let pwsh: String = pwsh.as_path().to_str().unwrap().into();
|
||||
assert!(is_safe_command_windows(&[
|
||||
pwsh.clone(),
|
||||
"-NoLogo".to_string(),
|
||||
"-NoProfile".to_string(),
|
||||
"-Command".to_string(),
|
||||
"rg --files-with-matches foo | Measure-Object | Select-Object -ExpandProperty Count"
|
||||
.to_string()
|
||||
]));
|
||||
|
||||
assert!(is_safe_command_windows(&[
|
||||
pwsh.clone(),
|
||||
"-NoLogo".to_string(),
|
||||
"-NoProfile".to_string(),
|
||||
"-Command".to_string(),
|
||||
"Get-Content foo.rs | Select-Object -Skip 200".to_string()
|
||||
]));
|
||||
|
||||
assert!(is_safe_command_windows(&[
|
||||
pwsh.clone(),
|
||||
"-NoLogo".to_string(),
|
||||
"-NoProfile".to_string(),
|
||||
"-Command".to_string(),
|
||||
"git -c core.pager=cat show HEAD:foo.rs".to_string()
|
||||
]));
|
||||
|
||||
assert!(is_safe_command_windows(&[
|
||||
pwsh.clone(),
|
||||
"-Command".to_string(),
|
||||
"-git cat-file -p HEAD:foo.rs".to_string()
|
||||
]));
|
||||
|
||||
assert!(is_safe_command_windows(&[
|
||||
pwsh.clone(),
|
||||
"-Command".to_string(),
|
||||
"(Get-Content foo.rs -Raw)".to_string()
|
||||
]));
|
||||
|
||||
assert!(is_safe_command_windows(&[
|
||||
pwsh,
|
||||
"-Command".to_string(),
|
||||
"Get-Item foo.rs | Select-Object Length".to_string()
|
||||
]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_powershell_commands_with_side_effects() {
|
||||
assert!(!is_safe_command_windows(&vec_str(&[
|
||||
"powershell.exe",
|
||||
"-NoLogo",
|
||||
"-Command",
|
||||
"Remove-Item foo.txt",
|
||||
])));
|
||||
|
||||
assert!(!is_safe_command_windows(&vec_str(&[
|
||||
"powershell.exe",
|
||||
"-NoProfile",
|
||||
"-Command",
|
||||
"rg --pre cat",
|
||||
])));
|
||||
|
||||
assert!(!is_safe_command_windows(&vec_str(&[
|
||||
"powershell.exe",
|
||||
"-Command",
|
||||
"Set-Content foo.txt 'hello'",
|
||||
])));
|
||||
|
||||
// Redirections are blocked
|
||||
assert!(!is_safe_command_windows(&vec_str(&[
|
||||
"powershell.exe",
|
||||
"-Command",
|
||||
"echo hi > out.txt",
|
||||
])));
|
||||
assert!(!is_safe_command_windows(&vec_str(&[
|
||||
"powershell.exe",
|
||||
"-Command",
|
||||
"Get-Content x | Out-File y",
|
||||
])));
|
||||
assert!(!is_safe_command_windows(&vec_str(&[
|
||||
"powershell.exe",
|
||||
"-Command",
|
||||
"Write-Output foo 2> err.txt",
|
||||
])));
|
||||
|
||||
// Call operator is blocked
|
||||
assert!(!is_safe_command_windows(&vec_str(&[
|
||||
"powershell.exe",
|
||||
"-Command",
|
||||
"& Remove-Item foo",
|
||||
])));
|
||||
|
||||
// Chained safe + unsafe must fail
|
||||
assert!(!is_safe_command_windows(&vec_str(&[
|
||||
"powershell.exe",
|
||||
"-Command",
|
||||
"Get-ChildItem; Remove-Item foo",
|
||||
])));
|
||||
// Nested unsafe cmdlet inside safe command must fail
|
||||
assert!(!is_safe_command_windows(&vec_str(&[
|
||||
"powershell.exe",
|
||||
"-Command",
|
||||
"Write-Output (Set-Content foo6.txt 'abc')",
|
||||
])));
|
||||
// Additional nested unsafe cmdlet examples must fail
|
||||
assert!(!is_safe_command_windows(&vec_str(&[
|
||||
"powershell.exe",
|
||||
"-Command",
|
||||
"Write-Host (Remove-Item foo.txt)",
|
||||
])));
|
||||
assert!(!is_safe_command_windows(&vec_str(&[
|
||||
"powershell.exe",
|
||||
"-Command",
|
||||
"Get-Content (New-Item bar.txt)",
|
||||
])));
|
||||
|
||||
// Unsafe @ expansion.
|
||||
assert!(!is_safe_command_windows(&vec_str(&[
|
||||
"powershell.exe",
|
||||
"-Command",
|
||||
"ls @(calc.exe)"
|
||||
])));
|
||||
|
||||
// Unsupported constructs that the AST parser refuses (no fallback to manual splitting).
|
||||
assert!(!is_safe_command_windows(&vec_str(&[
|
||||
"powershell.exe",
|
||||
"-Command",
|
||||
"ls && pwd"
|
||||
])));
|
||||
|
||||
// Sub-expressions are rejected even if they contain otherwise safe commands.
|
||||
assert!(!is_safe_command_windows(&vec_str(&[
|
||||
"powershell.exe",
|
||||
"-Command",
|
||||
"Write-Output $(Get-Content foo)"
|
||||
])));
|
||||
|
||||
// Empty words from the parser (e.g. '') are rejected.
|
||||
assert!(!is_safe_command_windows(&vec_str(&[
|
||||
"powershell.exe",
|
||||
"-Command",
|
||||
"''"
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn accepts_constant_expression_arguments() {
|
||||
assert!(is_safe_command_windows(&vec_str(&[
|
||||
"powershell.exe",
|
||||
"-Command",
|
||||
"Get-Content 'foo bar'"
|
||||
])));
|
||||
|
||||
assert!(is_safe_command_windows(&vec_str(&[
|
||||
"powershell.exe",
|
||||
"-Command",
|
||||
"Get-Content \"foo bar\""
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_dynamic_arguments() {
|
||||
assert!(!is_safe_command_windows(&vec_str(&[
|
||||
"powershell.exe",
|
||||
"-Command",
|
||||
"Get-Content $foo"
|
||||
])));
|
||||
|
||||
assert!(!is_safe_command_windows(&vec_str(&[
|
||||
"powershell.exe",
|
||||
"-Command",
|
||||
"Write-Output \"foo $bar\""
|
||||
])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uses_invoked_powershell_variant_for_parsing() {
|
||||
if !cfg!(windows) {
|
||||
return;
|
||||
}
|
||||
|
||||
let chain = "pwd && ls";
|
||||
assert!(
|
||||
!is_safe_command_windows(&vec_str(&[
|
||||
"powershell.exe",
|
||||
"-NoProfile",
|
||||
"-Command",
|
||||
chain,
|
||||
])),
|
||||
"`{chain}` is not recognized by powershell.exe"
|
||||
);
|
||||
|
||||
if let Some(pwsh) = try_find_pwsh_executable_blocking() {
|
||||
assert!(
|
||||
is_safe_command_windows(&[
|
||||
pwsh.as_path().to_str().unwrap().into(),
|
||||
"-NoProfile".to_string(),
|
||||
"-Command".to_string(),
|
||||
chain.to_string(),
|
||||
]),
|
||||
"`{chain}` should be considered safe to pwsh.exe"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
11
codex-rs/shell-command/src/lib.rs
Normal file
11
codex-rs/shell-command/src/lib.rs
Normal file
@@ -0,0 +1,11 @@
|
||||
//! Command parsing and safety utilities shared across Codex crates.
|
||||
|
||||
mod shell_detect;
|
||||
|
||||
pub mod bash;
|
||||
pub mod command_safety;
|
||||
pub mod parse_command;
|
||||
pub mod powershell;
|
||||
|
||||
pub use command_safety::is_dangerous_command;
|
||||
pub use command_safety::is_safe_command;
|
||||
2583
codex-rs/shell-command/src/parse_command.rs
Normal file
2583
codex-rs/shell-command/src/parse_command.rs
Normal file
File diff suppressed because it is too large
Load Diff
204
codex-rs/shell-command/src/powershell.rs
Normal file
204
codex-rs/shell-command/src/powershell.rs
Normal file
@@ -0,0 +1,204 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use codex_utils_absolute_path::AbsolutePathBuf;
|
||||
|
||||
use crate::shell_detect::ShellType;
|
||||
use crate::shell_detect::detect_shell_type;
|
||||
|
||||
const POWERSHELL_FLAGS: &[&str] = &["-nologo", "-noprofile", "-command", "-c"];
|
||||
|
||||
/// Prefixed command for powershell shell calls to force UTF-8 console output.
|
||||
pub const UTF8_OUTPUT_PREFIX: &str = "[Console]::OutputEncoding=[System.Text.Encoding]::UTF8;\n";
|
||||
|
||||
pub fn prefix_powershell_script_with_utf8(command: &[String]) -> Vec<String> {
|
||||
let Some((_, script)) = extract_powershell_command(command) else {
|
||||
return command.to_vec();
|
||||
};
|
||||
|
||||
let trimmed = script.trim_start();
|
||||
let script = if trimmed.starts_with(UTF8_OUTPUT_PREFIX) {
|
||||
script.to_string()
|
||||
} else {
|
||||
format!("{UTF8_OUTPUT_PREFIX}{script}")
|
||||
};
|
||||
|
||||
let mut command: Vec<String> = command[..(command.len() - 1)]
|
||||
.iter()
|
||||
.map(std::string::ToString::to_string)
|
||||
.collect();
|
||||
command.push(script);
|
||||
command
|
||||
}
|
||||
|
||||
/// Extract the PowerShell script body from an invocation such as:
|
||||
///
|
||||
/// - ["pwsh", "-NoProfile", "-Command", "Get-ChildItem -Recurse | Select-String foo"]
|
||||
/// - ["powershell.exe", "-Command", "Write-Host hi"]
|
||||
/// - ["powershell", "-NoLogo", "-NoProfile", "-Command", "...script..."]
|
||||
///
|
||||
/// Returns (`shell`, `script`) when the first arg is a PowerShell executable and a
|
||||
/// `-Command` (or `-c`) flag is present followed by a script string.
|
||||
pub fn extract_powershell_command(command: &[String]) -> Option<(&str, &str)> {
|
||||
if command.len() < 3 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let shell = &command[0];
|
||||
if !matches!(
|
||||
detect_shell_type(&PathBuf::from(shell)),
|
||||
Some(ShellType::PowerShell)
|
||||
) {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Find the first occurrence of -Command (accept common short alias -c as well)
|
||||
let mut i = 1usize;
|
||||
while i + 1 < command.len() {
|
||||
let flag = &command[i];
|
||||
// Reject unknown flags
|
||||
if !POWERSHELL_FLAGS.contains(&flag.to_ascii_lowercase().as_str()) {
|
||||
return None;
|
||||
}
|
||||
if flag.eq_ignore_ascii_case("-Command") || flag.eq_ignore_ascii_case("-c") {
|
||||
let script = &command[i + 1];
|
||||
return Some((shell, script));
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// This function attempts to find a valid PowerShell executable on the system.
|
||||
/// It first tries to find pwsh.exe, and if that fails, it tries to find
|
||||
/// powershell.exe.
|
||||
#[cfg(windows)]
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn try_find_powershellish_executable_blocking() -> Option<AbsolutePathBuf> {
|
||||
if let Some(pwsh_path) = try_find_pwsh_executable_blocking() {
|
||||
Some(pwsh_path)
|
||||
} else {
|
||||
try_find_powershell_executable_blocking()
|
||||
}
|
||||
}
|
||||
|
||||
/// This function attempts to find a powershell.exe executable on the system.
|
||||
pub fn try_find_powershell_executable_blocking() -> Option<AbsolutePathBuf> {
|
||||
try_find_powershellish_executable_in_path(&["powershell.exe"])
|
||||
}
|
||||
|
||||
/// This function attempts to find a pwsh.exe executable on the system.
|
||||
/// Note that pwsh.exe and powershell.exe are different executables:
|
||||
///
|
||||
/// - pwsh.exe is the cross-platform PowerShell Core (v6+) executable
|
||||
/// - powershell.exe is the Windows PowerShell (v5.1 and earlier) executable
|
||||
///
|
||||
/// Further, while powershell.exe is included by default on Windows systems,
|
||||
/// pwsh.exe must be installed separately by the user. And even when the user
|
||||
/// has installed pwsh.exe, it may not be available in the system PATH, in which
|
||||
/// case we attempt to locate it via other means.
|
||||
pub fn try_find_pwsh_executable_blocking() -> Option<AbsolutePathBuf> {
|
||||
if let Some(ps_home) = std::process::Command::new("cmd")
|
||||
.args(["/C", "pwsh", "-NoProfile", "-Command", "$PSHOME"])
|
||||
.output()
|
||||
.ok()
|
||||
.and_then(|out| {
|
||||
if !out.status.success() {
|
||||
return None;
|
||||
}
|
||||
let stdout = String::from_utf8_lossy(&out.stdout);
|
||||
let trimmed = stdout.trim();
|
||||
(!trimmed.is_empty()).then(|| trimmed.to_string())
|
||||
})
|
||||
{
|
||||
let candidate = AbsolutePathBuf::resolve_path_against_base("pwsh.exe", &ps_home);
|
||||
|
||||
if let Ok(candidate_abs_path) = candidate
|
||||
&& is_powershellish_executable_available(candidate_abs_path.as_path())
|
||||
{
|
||||
return Some(candidate_abs_path);
|
||||
}
|
||||
}
|
||||
|
||||
try_find_powershellish_executable_in_path(&["pwsh.exe"])
|
||||
}
|
||||
|
||||
fn try_find_powershellish_executable_in_path(candidates: &[&str]) -> Option<AbsolutePathBuf> {
|
||||
for candidate in candidates {
|
||||
let Ok(resolved_path) = which::which(candidate) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
if !is_powershellish_executable_available(&resolved_path) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let Ok(abs_path) = AbsolutePathBuf::from_absolute_path(resolved_path) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
return Some(abs_path);
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn is_powershellish_executable_available(powershell_or_pwsh_exe: &std::path::Path) -> bool {
|
||||
// This test works for both powershell.exe and pwsh.exe.
|
||||
std::process::Command::new(powershell_or_pwsh_exe)
|
||||
.args(["-NoLogo", "-NoProfile", "-Command", "Write-Output ok"])
|
||||
.output()
|
||||
.map(|output| output.status.success())
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::extract_powershell_command;
|
||||
|
||||
#[test]
|
||||
fn extracts_basic_powershell_command() {
|
||||
let cmd = vec![
|
||||
"powershell".to_string(),
|
||||
"-Command".to_string(),
|
||||
"Write-Host hi".to_string(),
|
||||
];
|
||||
let (_shell, script) = extract_powershell_command(&cmd).expect("extract");
|
||||
assert_eq!(script, "Write-Host hi");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extracts_lowercase_flags() {
|
||||
let cmd = vec![
|
||||
"powershell".to_string(),
|
||||
"-nologo".to_string(),
|
||||
"-command".to_string(),
|
||||
"Write-Host hi".to_string(),
|
||||
];
|
||||
let (_shell, script) = extract_powershell_command(&cmd).expect("extract");
|
||||
assert_eq!(script, "Write-Host hi");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extracts_full_path_powershell_command() {
|
||||
let command = if cfg!(windows) {
|
||||
"C:\\windows\\System32\\WindowsPowerShell\\v1.0\\powershell.exe".to_string()
|
||||
} else {
|
||||
"/usr/local/bin/powershell.exe".to_string()
|
||||
};
|
||||
let cmd = vec![command, "-Command".to_string(), "Write-Host hi".to_string()];
|
||||
let (_shell, script) = extract_powershell_command(&cmd).expect("extract");
|
||||
assert_eq!(script, "Write-Host hi");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extracts_with_noprofile_and_alias() {
|
||||
let cmd = vec![
|
||||
"pwsh".to_string(),
|
||||
"-NoProfile".to_string(),
|
||||
"-c".to_string(),
|
||||
"Get-ChildItem | Select-String foo".to_string(),
|
||||
];
|
||||
let (_shell, script) = extract_powershell_command(&cmd).expect("extract");
|
||||
assert_eq!(script, "Get-ChildItem | Select-String foo");
|
||||
}
|
||||
}
|
||||
32
codex-rs/shell-command/src/shell_detect.rs
Normal file
32
codex-rs/shell-command/src/shell_detect.rs
Normal file
@@ -0,0 +1,32 @@
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub(crate) enum ShellType {
|
||||
Zsh,
|
||||
Bash,
|
||||
PowerShell,
|
||||
Sh,
|
||||
Cmd,
|
||||
}
|
||||
|
||||
pub(crate) fn detect_shell_type(shell_path: &PathBuf) -> Option<ShellType> {
|
||||
match shell_path.as_os_str().to_str() {
|
||||
Some("zsh") => Some(ShellType::Zsh),
|
||||
Some("sh") => Some(ShellType::Sh),
|
||||
Some("cmd") => Some(ShellType::Cmd),
|
||||
Some("bash") => Some(ShellType::Bash),
|
||||
Some("pwsh") => Some(ShellType::PowerShell),
|
||||
Some("powershell") => Some(ShellType::PowerShell),
|
||||
_ => {
|
||||
let shell_name = shell_path.file_stem();
|
||||
if let Some(shell_name) = shell_name {
|
||||
let shell_name_path = Path::new(shell_name);
|
||||
if shell_name_path != Path::new(shell_path) {
|
||||
return detect_shell_type(&shell_name_path.to_path_buf());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user