20 KiB
PR #1668: feat: expand the set of commands that can be safely identified as "trusted"
- URL: https://github.com/openai/codex/pull/1668
- Author: bolinfest
- Created: 2025-07-24 05:57:54 UTC
- Updated: 2025-07-24 21:13:37 UTC
- Changes: +291/-132, Files changed: 3, Commits: 1
Description
This PR updates is_known_safe_command() to account for "safe operators" to expand the set of commands that can be run without approval. This concept existed in the TypeScript CLI, and we are [finally!] porting it to the Rust one:
c9e2def494/codex-cli/src/approvals.ts (L531-L541)
The idea is that if we have EXPR1 SAFE_OP EXPR2 and EXPR1 and EXPR2 are considered safe independently, then EXPR1 SAFE_OP EXPR2 should be considered safe. Currently, SAFE_OP includes &&, ||, ;, and |.
In the TypeScript implementation, we relied on https://www.npmjs.com/package/shell-quote to parse the string of Bash, as it could provide a "lightweight" parse tree, parsing 'beep || boop > /byte' as:
[ 'beep', { op: '||' }, 'boop', { op: '>' }, '/byte' ]
Though in this PR, we introduce the use of https://crates.io/crates/tree-sitter-bash for parsing (which incidentally we were already using in codex-apply-patch), which gives us a richer parse tree. (Incidentally, if you have never played with tree-sitter, try the playground and select Bash from the dropdown to see how it parses various expressions.)
As a concrete example, prior to this change, our implementation of is_known_safe_command() could verify things like:
["bash", "-lc", "grep -R \"Cargo.toml\" -n"]
but not:
["bash", "-lc", "grep -R \"Cargo.toml\" -n || true"]
With this change, the version with || true is also accepted.
Admittedly, this PR does not expand the safety check to support subshells, so it would reject, e.g. bash -lc 'ls || (pwd && echo hi)', but that can be addressed in a subsequent PR.
Full Diff
diff --git a/codex-rs/core/src/bash.rs b/codex-rs/core/src/bash.rs
new file mode 100644
index 0000000000..b9cd444356
--- /dev/null
+++ b/codex-rs/core/src/bash.rs
@@ -0,0 +1,219 @@
+use tree_sitter::Parser;
+use tree_sitter::Tree;
+use tree_sitter_bash::LANGUAGE as BASH;
+
+/// Parse the provided bash source using tree-sitter-bash, returning a Tree on
+/// success or None if parsing failed.
+pub fn try_parse_bash(bash_lc_arg: &str) -> Option<Tree> {
+ let lang = BASH.into();
+ let mut parser = Parser::new();
+ #[expect(clippy::expect_used)]
+ parser.set_language(&lang).expect("load bash grammar");
+ let old_tree: Option<&Tree> = None;
+ parser.parse(bash_lc_arg, old_tree)
+}
+
+/// Parse a script which may contain multiple simple commands joined only by
+/// the safe logical/pipe/sequencing operators: `&&`, `||`, `;`, `|`.
+///
+/// Returns `Some(Vec<command_words>)` if every command is a plain word‑only
+/// command and the parse tree does not contain disallowed constructs
+/// (parentheses, redirections, substitutions, control flow, etc.). Otherwise
+/// returns `None`.
+pub fn try_parse_word_only_commands_sequence(tree: &Tree, src: &str) -> Option<Vec<Vec<String>>> {
+ if tree.root_node().has_error() {
+ return None;
+ }
+
+ // List of allowed (named) node kinds for a "word only commands sequence".
+ // If we encounter a named node that is not in this list we reject.
+ const ALLOWED_KINDS: &[&str] = &[
+ // top level containers
+ "program",
+ "list",
+ "pipeline",
+ // commands & words
+ "command",
+ "command_name",
+ "word",
+ "string",
+ "string_content",
+ "raw_string",
+ "number",
+ ];
+ // Allow only safe punctuation / operator tokens; anything else causes reject.
+ const ALLOWED_PUNCT_TOKENS: &[&str] = &["&&", "||", ";", "|", "\"", "'"];
+
+ let root = tree.root_node();
+ let mut cursor = root.walk();
+ let mut stack = vec![root];
+ let mut command_nodes = Vec::new();
+ while let Some(node) = stack.pop() {
+ let kind = node.kind();
+ if node.is_named() {
+ if !ALLOWED_KINDS.contains(&kind) {
+ return None;
+ }
+ if kind == "command" {
+ command_nodes.push(node);
+ }
+ } else {
+ // Reject any punctuation / operator tokens that are not explicitly allowed.
+ if kind.chars().any(|c| "&;|".contains(c)) && !ALLOWED_PUNCT_TOKENS.contains(&kind) {
+ return None;
+ }
+ if !(ALLOWED_PUNCT_TOKENS.contains(&kind) || kind.trim().is_empty()) {
+ // If it's a quote token or operator it's allowed above; we also allow whitespace tokens.
+ // Any other punctuation like parentheses, braces, redirects, backticks, etc are rejected.
+ return None;
+ }
+ }
+ for child in node.children(&mut cursor) {
+ stack.push(child);
+ }
+ }
+
+ let mut commands = Vec::new();
+ for node in command_nodes {
+ if let Some(words) = parse_plain_command_from_node(node, src) {
+ commands.push(words);
+ } else {
+ return None;
+ }
+ }
+ Some(commands)
+}
+
+fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option<Vec<String>> {
+ if cmd.kind() != "command" {
+ return None;
+ }
+ let mut words = Vec::new();
+ let mut cursor = cmd.walk();
+ for child in cmd.named_children(&mut cursor) {
+ match child.kind() {
+ "command_name" => {
+ let word_node = child.named_child(0)?;
+ if word_node.kind() != "word" {
+ return None;
+ }
+ words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned());
+ }
+ "word" | "number" => {
+ words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned());
+ }
+ "string" => {
+ if child.child_count() == 3
+ && child.child(0)?.kind() == "\""
+ && child.child(1)?.kind() == "string_content"
+ && child.child(2)?.kind() == "\""
+ {
+ words.push(child.child(1)?.utf8_text(src.as_bytes()).ok()?.to_owned());
+ } else {
+ return None;
+ }
+ }
+ "raw_string" => {
+ let raw_string = child.utf8_text(src.as_bytes()).ok()?;
+ let stripped = raw_string
+ .strip_prefix('\'')
+ .and_then(|s| s.strip_suffix('\''));
+ if let Some(s) = stripped {
+ words.push(s.to_owned());
+ } else {
+ return None;
+ }
+ }
+ _ => return None,
+ }
+ }
+ Some(words)
+}
+
+#[cfg(test)]
+mod tests {
+ #![allow(clippy::unwrap_used)]
+ use super::*;
+
+ fn parse_seq(src: &str) -> Option<Vec<Vec<String>>> {
+ let tree = try_parse_bash(src)?;
+ try_parse_word_only_commands_sequence(&tree, src)
+ }
+
+ #[test]
+ fn accepts_single_simple_command() {
+ let cmds = parse_seq("ls -1").unwrap();
+ assert_eq!(cmds, vec![vec!["ls".to_string(), "-1".to_string()]]);
+ }
+
+ #[test]
+ fn accepts_multiple_commands_with_allowed_operators() {
+ let src = "ls && pwd; echo 'hi there' | wc -l";
+ let cmds = parse_seq(src).unwrap();
+ let expected: Vec<Vec<String>> = vec![
+ vec!["wc".to_string(), "-l".to_string()],
+ vec!["echo".to_string(), "hi there".to_string()],
+ vec!["pwd".to_string()],
+ vec!["ls".to_string()],
+ ];
+ assert_eq!(cmds, expected);
+ }
+
+ #[test]
+ fn extracts_double_and_single_quoted_strings() {
+ let cmds = parse_seq("echo \"hello world\"").unwrap();
+ assert_eq!(
+ cmds,
+ vec![vec!["echo".to_string(), "hello world".to_string()]]
+ );
+
+ let cmds2 = parse_seq("echo 'hi there'").unwrap();
+ assert_eq!(
+ cmds2,
+ vec![vec!["echo".to_string(), "hi there".to_string()]]
+ );
+ }
+
+ #[test]
+ fn accepts_numbers_as_words() {
+ let cmds = parse_seq("echo 123 456").unwrap();
+ assert_eq!(
+ cmds,
+ vec![vec![
+ "echo".to_string(),
+ "123".to_string(),
+ "456".to_string()
+ ]]
+ );
+ }
+
+ #[test]
+ fn rejects_parentheses_and_subshells() {
+ assert!(parse_seq("(ls)").is_none());
+ assert!(parse_seq("ls || (pwd && echo hi)").is_none());
+ }
+
+ #[test]
+ fn rejects_redirections_and_unsupported_operators() {
+ assert!(parse_seq("ls > out.txt").is_none());
+ assert!(parse_seq("echo hi & echo bye").is_none());
+ }
+
+ #[test]
+ fn rejects_command_and_process_substitutions_and_expansions() {
+ assert!(parse_seq("echo $(pwd)").is_none());
+ assert!(parse_seq("echo `pwd`").is_none());
+ assert!(parse_seq("echo $HOME").is_none());
+ assert!(parse_seq("echo \"hi $USER\"").is_none());
+ }
+
+ #[test]
+ fn rejects_variable_assignment_prefix() {
+ assert!(parse_seq("FOO=bar ls").is_none());
+ }
+
+ #[test]
+ fn rejects_trailing_operator_parse_error() {
+ assert!(parse_seq("ls &&").is_none());
+ }
+}
diff --git a/codex-rs/core/src/is_safe_command.rs b/codex-rs/core/src/is_safe_command.rs
index 493650a4b5..f5f453f8d8 100644
--- a/codex-rs/core/src/is_safe_command.rs
+++ b/codex-rs/core/src/is_safe_command.rs
@@ -1,22 +1,34 @@
-use tree_sitter::Parser;
-use tree_sitter::Tree;
-use tree_sitter_bash::LANGUAGE as BASH;
+use crate::bash::try_parse_bash;
+use crate::bash::try_parse_word_only_commands_sequence;
pub fn is_known_safe_command(command: &[String]) -> bool {
if is_safe_to_call_with_exec(command) {
return true;
}
- // TODO(mbolin): Also support safe commands that are piped together such
- // as `cat foo | wc -l`.
- matches!(
- command,
- [bash, flag, script]
- if bash == "bash"
- && flag == "-lc"
- && try_parse_bash(script).and_then(|tree|
- try_parse_single_word_only_command(&tree, script)).is_some_and(|parsed_bash_command| is_safe_to_call_with_exec(&parsed_bash_command))
- )
+ // Support `bash -lc "..."` where the script consists solely of one or
+ // more "plain" commands (only bare words / quoted strings) combined with
+ // a conservative allow‑list of shell operators that themselves do not
+ // introduce side effects ( "&&", "||", ";", and "|" ). If every
+ // individual command in the script is itself a known‑safe command, then
+ // the composite expression is considered safe.
+ if let [bash, flag, script] = command {
+ if bash == "bash" && flag == "-lc" {
+ if let Some(tree) = try_parse_bash(script) {
+ if let Some(all_commands) = try_parse_word_only_commands_sequence(&tree, script) {
+ if !all_commands.is_empty()
+ && all_commands
+ .iter()
+ .all(|cmd| is_safe_to_call_with_exec(cmd))
+ {
+ return true;
+ }
+ }
+ }
+ }
+ }
+
+ false
}
fn is_safe_to_call_with_exec(command: &[String]) -> bool {
@@ -109,90 +121,7 @@ fn is_safe_to_call_with_exec(command: &[String]) -> bool {
}
}
-fn try_parse_bash(bash_lc_arg: &str) -> Option<Tree> {
- let lang = BASH.into();
- let mut parser = Parser::new();
- #[expect(clippy::expect_used)]
- parser.set_language(&lang).expect("load bash grammar");
-
- let old_tree: Option<&Tree> = None;
- parser.parse(bash_lc_arg, old_tree)
-}
-
-/// If `tree` represents a single Bash command whose name and every argument is
-/// an ordinary `word`, return those words in order; otherwise, return `None`.
-///
-/// `src` must be the exact source string that was parsed into `tree`, so we can
-/// extract the text for every node.
-pub fn try_parse_single_word_only_command(tree: &Tree, src: &str) -> Option<Vec<String>> {
- // Any parse error is an immediate rejection.
- if tree.root_node().has_error() {
- return None;
- }
-
- // (program …) with exactly one statement
- let root = tree.root_node();
- if root.kind() != "program" || root.named_child_count() != 1 {
- return None;
- }
-
- let cmd = root.named_child(0)?; // (command …)
- if cmd.kind() != "command" {
- return None;
- }
-
- let mut words = Vec::new();
- let mut cursor = cmd.walk();
-
- for child in cmd.named_children(&mut cursor) {
- match child.kind() {
- // The command name node wraps one `word` child.
- "command_name" => {
- let word_node = child.named_child(0)?; // make sure it's only a word
- if word_node.kind() != "word" {
- return None;
- }
- words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned());
- }
- // Positional‑argument word (allowed).
- "word" | "number" => {
- words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned());
- }
- "string" => {
- if child.child_count() == 3
- && child.child(0)?.kind() == "\""
- && child.child(1)?.kind() == "string_content"
- && child.child(2)?.kind() == "\""
- {
- words.push(child.child(1)?.utf8_text(src.as_bytes()).ok()?.to_owned());
- } else {
- // Anything else means the command is *not* plain words.
- return None;
- }
- }
- "concatenation" => {
- // TODO: Consider things like `'ab\'a'`.
- return None;
- }
- "raw_string" => {
- // Raw string is a single word, but we need to strip the quotes.
- let raw_string = child.utf8_text(src.as_bytes()).ok()?;
- let stripped = raw_string
- .strip_prefix('\'')
- .and_then(|s| s.strip_suffix('\''));
- if let Some(stripped) = stripped {
- words.push(stripped.to_owned());
- } else {
- return None;
- }
- }
- // Anything else means the command is *not* plain words.
- _ => return None,
- }
- }
-
- Some(words)
-}
+// (bash parsing helpers implemented in crate::bash)
/* ----------------------------------------------------------
Example
@@ -230,6 +159,7 @@ fn is_valid_sed_n_arg(arg: Option<&str>) -> bool {
_ => false,
}
}
+
#[cfg(test)]
mod tests {
#![allow(clippy::unwrap_used)]
@@ -353,6 +283,30 @@ mod tests {
])));
}
+ #[test]
+ fn bash_lc_safe_examples_with_operators() {
+ assert!(is_known_safe_command(&vec_str(&[
+ "bash",
+ "-lc",
+ "grep -R \"Cargo.toml\" -n || true"
+ ])));
+ assert!(is_known_safe_command(&vec_str(&[
+ "bash",
+ "-lc",
+ "ls && pwd"
+ ])));
+ assert!(is_known_safe_command(&vec_str(&[
+ "bash",
+ "-lc",
+ "echo 'hi' ; ls"
+ ])));
+ assert!(is_known_safe_command(&vec_str(&[
+ "bash",
+ "-lc",
+ "ls | wc -l"
+ ])));
+ }
+
#[test]
fn bash_lc_unsafe_examples() {
assert!(
@@ -366,44 +320,29 @@ mod tests {
assert!(
!is_known_safe_command(&vec_str(&["bash", "-lc", "find . -name file.txt -delete"])),
- "Unsafe find option should not be auto‑approved."
+ "Unsafe find option should not be auto-approved."
);
- }
- #[test]
- fn test_try_parse_single_word_only_command() {
- let script_with_single_quoted_string = "sed -n '1,5p' file.txt";
- let parsed_words = try_parse_bash(script_with_single_quoted_string)
- .and_then(|tree| {
- try_parse_single_word_only_command(&tree, script_with_single_quoted_string)
- })
- .unwrap();
- assert_eq!(
- vec![
- "sed".to_string(),
- "-n".to_string(),
- // Ensure the single quotes are properly removed.
- "1,5p".to_string(),
- "file.txt".to_string()
- ],
- parsed_words,
+ // Disallowed because of unsafe command in sequence.
+ assert!(
+ !is_known_safe_command(&vec_str(&["bash", "-lc", "ls && rm -rf /"])),
+ "Sequence containing unsafe command must be rejected"
);
- let script_with_number_arg = "ls -1";
- let parsed_words = try_parse_bash(script_with_number_arg)
- .and_then(|tree| try_parse_single_word_only_command(&tree, script_with_number_arg))
- .unwrap();
- assert_eq!(vec!["ls", "-1"], parsed_words,);
-
- let script_with_double_quoted_string_with_no_funny_stuff_arg = "grep -R \"Cargo.toml\" -n";
- let parsed_words = try_parse_bash(script_with_double_quoted_string_with_no_funny_stuff_arg)
- .and_then(|tree| {
- try_parse_single_word_only_command(
- &tree,
- script_with_double_quoted_string_with_no_funny_stuff_arg,
- )
- })
- .unwrap();
- assert_eq!(vec!["grep", "-R", "Cargo.toml", "-n"], parsed_words);
+ // Disallowed because of parentheses / subshell.
+ assert!(
+ !is_known_safe_command(&vec_str(&["bash", "-lc", "(ls)"])),
+ "Parentheses (subshell) are not provably safe with the current parser"
+ );
+ assert!(
+ !is_known_safe_command(&vec_str(&["bash", "-lc", "ls || (pwd && echo hi)"])),
+ "Nested parentheses are not provably safe with the current parser"
+ );
+
+ // Disallowed redirection.
+ assert!(
+ !is_known_safe_command(&vec_str(&["bash", "-lc", "ls > out.txt"])),
+ "> redirection should be rejected"
+ );
}
}
diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs
index 4e69e94b55..12321e0abc 100644
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -5,6 +5,7 @@
// the TUI or the tracing stack).
#![deny(clippy::print_stdout, clippy::print_stderr)]
+mod bash;
mod chat_completions;
mod client;
mod client_common;
Review Comments
codex-rs/core/src/bash.rs
- Created: 2025-07-24 21:05:51 UTC | Link: https://github.com/openai/codex/pull/1668#discussion_r2229574980
@@ -0,0 +1,219 @@
+use tree_sitter::Parser;
+use tree_sitter::Tree;
+use tree_sitter_bash::LANGUAGE as BASH;
+
+/// Parse the provided bash source using tree-sitter-bash, returning a Tree on
+/// success or None if parsing failed.
+pub fn try_parse_bash(bash_lc_arg: &str) -> Option<Tree> {
+ let lang = BASH.into();
+ let mut parser = Parser::new();
+ #[expect(clippy::expect_used)]
+ parser.set_language(&lang).expect("load bash grammar");
+ let old_tree: Option<&Tree> = None;
+ parser.parse(bash_lc_arg, old_tree)
+}
+
+/// Parse a script which may contain multiple simple commands joined only by
+/// the safe logical/pipe/sequencing operators: `&&`, `||`, `;`, `|`.
+///
+/// Returns `Some(Vec<command_words>)` if every command is a plain word‑only
+/// command and the parse tree does not contain disallowed constructs
+/// (parentheses, redirections, substitutions, control flow, etc.). Otherwise
+/// returns `None`.
+pub fn try_parse_word_only_commands_sequence(tree: &Tree, src: &str) -> Option<Vec<Vec<String>>> {
+ if tree.root_node().has_error() {
+ return None;
+ }
+
+ // List of allowed (named) node kinds for a "word only commands sequence".
+ // If we encounter a named node that is not in this list we reject.
+ const ALLOWED_KINDS: &[&str] = &[
The tree sitter API returns a
Stringfromnode.kind(), so it's not our API to control, unfortunately.