Files
codex/prs/bolinfest/PR-1680.md
2025-09-02 15:17:45 -07:00

5.4 KiB

PR #1680: Check ripgrep availability before suggesting it

Description

Summary

  • avoid recommending rg when it is not installed
  • detect rg once and adjust base instructions

Testing

  • just fmt (fails: cargo not found)
  • just fix (fails: cargo not found)
  • cargo test --all-features (fails: cargo not found)

https://chatgpt.com/codex/tasks/task_i_6883ac0090c4832cb8434edc024685b7

Full Diff

diff --git a/codex-rs/core/Cargo.toml b/codex-rs/core/Cargo.toml
index 62e462bf97..1db49fd21b 100644
--- a/codex-rs/core/Cargo.toml
+++ b/codex-rs/core/Cargo.toml
@@ -25,6 +25,7 @@ futures = "0.3"
 libc = "0.2.174"
 mcp-types = { path = "../mcp-types" }
 mime_guess = "2.0"
+once_cell = "1"
 rand = "0.9"
 reqwest = { version = "0.12", features = ["json", "stream"] }
 serde = { version = "1", features = ["derive"] }
diff --git a/codex-rs/core/src/client_common.rs b/codex-rs/core/src/client_common.rs
index afd2f04556..caf5a0af11 100644
--- a/codex-rs/core/src/client_common.rs
+++ b/codex-rs/core/src/client_common.rs
@@ -12,11 +12,35 @@ use std::pin::Pin;
 use std::task::Context;
 use std::task::Poll;
 use tokio::sync::mpsc;
+use once_cell::sync::Lazy;
+use std::process::Command;
+use std::process::Stdio;
 
 /// The `instructions` field in the payload sent to a model should always start
 /// with this content.
 const BASE_INSTRUCTIONS: &str = include_str!("../prompt.md");
 
+static RG_AVAILABLE: Lazy<bool> = Lazy::new(|| {
+    Command::new("rg")
+        .arg("--version")
+        .stdout(Stdio::null())
+        .stderr(Stdio::null())
+        .status()
+        .map(|s| s.success())
+        .unwrap_or(false)
+});
+
+const RG_LINE: &str = "- Do not use `ls -R`, `find`, or `grep` - these are slow in large repos. Use `rg` and `rg --files`.";
+const RG_LINE_NO_RG: &str = "- Do not use `ls -R`, `find`, or `grep` - these are slow in large repos.";
+
+fn base_instructions() -> Cow<'static, str> {
+    if *RG_AVAILABLE {
+        Cow::Borrowed(BASE_INSTRUCTIONS)
+    } else {
+        Cow::Owned(BASE_INSTRUCTIONS.replace(RG_LINE, RG_LINE_NO_RG))
+    }
+}
+
 /// API request payload for a single model turn.
 #[derive(Default, Debug, Clone)]
 pub struct Prompt {
@@ -42,8 +66,9 @@ impl Prompt {
         let base = self
             .base_instructions_override
             .as_deref()
-            .unwrap_or(BASE_INSTRUCTIONS);
-        let mut sections: Vec<&str> = vec![base];
+            .map(Cow::Borrowed)
+            .unwrap_or_else(|| base_instructions());
+        let mut sections: Vec<&str> = vec![base.as_ref()];
         if let Some(ref user) = self.user_instructions {
             sections.push(user);
         }

Review Comments

codex-rs/core/src/client_common.rs

@@ -12,11 +12,35 @@ use std::pin::Pin;
 use std::task::Context;
 use std::task::Poll;
 use tokio::sync::mpsc;
+use once_cell::sync::Lazy;
+use std::process::Command;
+use std::process::Stdio;
 
 /// The `instructions` field in the payload sent to a model should always start
 /// with this content.
 const BASE_INSTRUCTIONS: &str = include_str!("../prompt.md");
 
+static RG_AVAILABLE: Lazy<bool> = Lazy::new(|| {
+    Command::new("rg")
+        .arg("--version")
+        .stdout(Stdio::null())
+        .stderr(Stdio::null())
+        .status()
+        .map(|s| s.success())
+        .unwrap_or(false)
+});
+
+const RG_LINE: &str = "- Do not use `ls -R`, `find`, or `grep` - these are slow in large repos. Use `rg` and `rg --files`.";
+const RG_LINE_NO_RG: &str = "- Do not use `ls -R`, `find`, or `grep` - these are slow in large repos.";
+
+fn base_instructions() -> Cow<'static, str> {
+    if *RG_AVAILABLE {
+        Cow::Borrowed(BASE_INSTRUCTIONS)
+    } else {
+        Cow::Owned(BASE_INSTRUCTIONS.replace(RG_LINE, RG_LINE_NO_RG))

I feel like we need a test to verify that RG_LINE exists in BASE_INSTRUCTIONS. That way, if we update prompt.md, we are careful to update this function, as well.

@@ -12,11 +12,35 @@ use std::pin::Pin;
 use std::task::Context;
 use std::task::Poll;
 use tokio::sync::mpsc;
+use once_cell::sync::Lazy;
+use std::process::Command;
+use std::process::Stdio;
 
 /// The `instructions` field in the payload sent to a model should always start
 /// with this content.
 const BASE_INSTRUCTIONS: &str = include_str!("../prompt.md");
 
+static RG_AVAILABLE: Lazy<bool> = Lazy::new(|| {
+    Command::new("rg")
+        .arg("--version")
+        .stdout(Stdio::null())
+        .stderr(Stdio::null())
+        .status()
+        .map(|s| s.success())
+        .unwrap_or(false)
+});
+
+const RG_LINE: &str = "- Do not use `ls -R`, `find`, or `grep` - these are slow in large repos. Use `rg` and `rg --files`.";
+const RG_LINE_NO_RG: &str = "- Do not use `ls -R`, `find`, or `grep` - these are slow in large repos.";
+
+fn base_instructions() -> Cow<'static, str> {
+    if *RG_AVAILABLE {
+        Cow::Borrowed(BASE_INSTRUCTIONS)
+    } else {
+        Cow::Owned(BASE_INSTRUCTIONS.replace(RG_LINE, RG_LINE_NO_RG))

Also, there appear to be backslashes in front of the backticks in prompt.md, so are we sure this matches?