Files
codex/prs/bolinfest/PR-1730.md
2025-09-02 15:17:45 -07:00

46 KiB
Raw Blame History

PR #1730: agents.md path shown at tui launch

Description

show agents.md on login, decouple function to find and load agents.md, add unit tests for agents.md discovery

Preview:

with an agents.md

image

without

image

Full Diff

diff --git a/codex-rs/common/src/config_summary.rs b/codex-rs/common/src/config_summary.rs
index 39d524731f..616030a85a 100644
--- a/codex-rs/common/src/config_summary.rs
+++ b/codex-rs/common/src/config_summary.rs
@@ -1,4 +1,5 @@
 use codex_core::WireApi;
+use codex_core::agents_doc_path_string;
 use codex_core::config::Config;
 
 use crate::sandbox_summary::summarize_sandbox_policy;
@@ -7,6 +8,10 @@ use crate::sandbox_summary::summarize_sandbox_policy;
 pub fn create_config_summary_entries(config: &Config) -> Vec<(&'static str, String)> {
     let mut entries = vec![
         ("workdir", config.cwd.display().to_string()),
+        (
+            "agents.md",
+            agents_doc_path_string(config).unwrap_or_else(|| "none".to_string()),
+        ),
         ("model", config.model.clone()),
         ("provider", config.model_provider_id.clone()),
         ("approval", config.approval_policy.to_string()),
diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
index 98d13b4cd6..64f0cb2bab 100644
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -857,6 +857,7 @@ async fn submission_loop(
                     msg: EventMsg::SessionConfigured(SessionConfiguredEvent {
                         session_id,
                         model,
+                        agents_doc_path: crate::project_doc::agents_doc_path_string(&config),
                         history_log_id,
                         history_entry_count,
                     }),
diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs
index c728bd3125..037476be0c 100644
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -39,6 +39,8 @@ mod openai_model_info;
 mod openai_tools;
 pub mod plan_tool;
 mod project_doc;
+pub use project_doc::agents_doc_path_string;
+pub use project_doc::discover_project_doc_path;
 pub mod protocol;
 mod rollout;
 pub(crate) mod safety;
diff --git a/codex-rs/core/src/project_doc.rs b/codex-rs/core/src/project_doc.rs
index 9f46159d1d..56807da1ce 100644
--- a/codex-rs/core/src/project_doc.rs
+++ b/codex-rs/core/src/project_doc.rs
@@ -12,7 +12,10 @@
 //!     exists, the search stops  we do **not** walk past the Git root.
 
 use crate::config::Config;
+use std::fs;
+use std::io::Read as _;
 use std::path::Path;
+use std::path::PathBuf;
 use tokio::io::AsyncReadExt;
 use tracing::error;
 
@@ -49,6 +52,10 @@ pub(crate) async fn get_user_instructions(config: &Config) -> Option<String> {
 /// the function returns `Ok(None)`. Unexpected I/O failures bubble up as
 /// `Err` so callers can decide how to handle them.
 async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
+    if config.project_doc_max_bytes == 0 {
+        return Ok(None);
+    }
+
     let max_bytes = config.project_doc_max_bytes;
 
     // Attempt to load from the working directory first.
@@ -90,6 +97,124 @@ async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
     Ok(None)
 }
 
+/// Public helper that returns the discovered AGENTS.md path.
+/// Returns `Ok(None)` when no suitable file is found or `project_doc_max_bytes == 0`.
+pub fn discover_project_doc_path(config: &Config) -> std::io::Result<Option<std::path::PathBuf>> {
+    if config.project_doc_max_bytes == 0 {
+        return Ok(None);
+    }
+
+    discover_project_doc_path_from_dir(&config.cwd, CANDIDATE_FILENAMES)
+}
+
+fn discover_project_doc_path_from_dir(
+    start_dir: &Path,
+    names: &[&str],
+) -> std::io::Result<Option<std::path::PathBuf>> {
+    // Canonicalize the path so that we do not end up in an infinite loop when
+    // `cwd` contains `..` components.
+    let mut dir = start_dir.to_path_buf();
+    if let Ok(canon) = dir.canonicalize() {
+        dir = canon;
+    }
+
+    // Attempt in the working directory first.
+    if let Some(path) = first_nonempty_candidate_in_dir(&dir, names) {
+        return Ok(Some(path));
+    }
+
+    // Walk up towards the filesystem root, stopping once we encounter the Git root.
+    while let Some(parent) = dir.parent() {
+        let git_marker = dir.join(".git");
+        let git_exists = match fs::metadata(&git_marker) {
+            Ok(_) => true,
+            Err(e) if e.kind() == std::io::ErrorKind::NotFound => false,
+            Err(e) => return Err(e),
+        };
+
+        if git_exists {
+            if let Some(path) = first_nonempty_candidate_in_dir(&dir, names) {
+                return Ok(Some(path));
+            }
+            break; // do not walk past the Git root
+        }
+
+        dir = parent.to_path_buf();
+    }
+
+    Ok(None)
+}
+
+/// Return a humanreadable description of the AGENTS.md path(s) that will be
+/// loaded for this session, or `None` if neither global nor project docs are
+/// present.
+///
+/// This mirrors the discovery logic used by `get_user_instructions()`:
+/// - If `~/.codex/AGENTS.md` (global) is nonempty, it is included.
+/// - If a project AGENTS.md is found (respecting the byte limit and git root
+///   stop), it is included.
+/// - When the project_doc_max_bytes is set to 0, project docs are disabled.
+pub fn agents_doc_path_string(config: &Config) -> Option<String> {
+    let mut parts: Vec<String> = Vec::new();
+
+    // Global AGENTS.md in CODEX_HOME.
+    if config.user_instructions.is_some() {
+        let global = config.codex_home.join("AGENTS.md");
+        parts.push(global.display().to_string());
+    }
+
+    // Project AGENTS.md, unless disabled via bytelimit == 0.
+    if config.project_doc_max_bytes > 0 {
+        if let Ok(Some(p)) = discover_project_doc_path(config) {
+            parts.push(p.display().to_string());
+        }
+    }
+
+    if parts.is_empty() {
+        None
+    } else {
+        Some(parts.join(" + "))
+    }
+}
+
+fn first_nonempty_candidate_in_dir(dir: &Path, names: &[&str]) -> Option<PathBuf> {
+    for name in names {
+        let candidate = dir.join(name);
+        // Fast path: must exist and be a file.
+        let md = match std::fs::metadata(&candidate) {
+            Ok(m) if m.is_file() => m,
+            _ => continue,
+        };
+
+        // If the file is zero bytes, skip without reading.
+        if md.len() == 0 {
+            continue;
+        }
+
+        // Read up to a modest limit to determine if the contents are effectively empty after trimming.
+        // Use the same limit as `project_doc_max_bytes` would permit by default.
+        const MAX_PEEK_BYTES: usize = 8 * 1024;
+        let mut file = match std::fs::File::open(&candidate) {
+            Ok(f) => f,
+            Err(_) => continue,
+        };
+        let mut buf = Vec::with_capacity(std::cmp::min(md.len() as usize, MAX_PEEK_BYTES));
+        if std::io::Read::by_ref(&mut file)
+            .take(MAX_PEEK_BYTES as u64)
+            .read_to_end(&mut buf)
+            .is_err()
+        {
+            continue;
+        }
+        let s = String::from_utf8_lossy(&buf);
+        if s.trim().is_empty() {
+            continue;
+        }
+        return Some(candidate);
+    }
+    None
+}
+
 /// Attempt to load the first candidate file found in `dir`. Returns the file
 /// contents (truncated if it exceeds `max_bytes`) when successful.
 async fn load_first_candidate(
@@ -237,6 +362,68 @@ mod tests {
         assert_eq!(res, "root level doc");
     }
 
+    /// Test if AGENTS.md located in the current working directory is preferred over the repo root.
+    #[tokio::test]
+    async fn prefers_cwd_doc_over_repo_root() {
+        let repo = tempfile::tempdir().expect("tempdir");
+
+        // Simulate a git repository at repo root.
+        std::fs::write(repo.path().join(".git"), "gitdir: /dev/null\n").unwrap();
+
+        // Create AGENTS.md at repo root and in a nested cwd.
+        fs::write(repo.path().join("AGENTS.md"), "root level doc").unwrap();
+        let nested = repo.path().join("workspace/crate_b");
+        std::fs::create_dir_all(&nested).unwrap();
+        fs::write(nested.join("AGENTS.md"), "nested cwd doc").unwrap();
+
+        // Build config pointing at the nested dir.
+        let mut cfg = make_config(&repo, 4096, None);
+        cfg.cwd = nested.clone();
+
+        // Path discovery should prefer the nested cwd doc.
+        let discovered = super::discover_project_doc_path(&cfg)
+            .expect("discovery should succeed")
+            .expect("path should be found");
+        let discovered_canon = fs::canonicalize(&discovered).expect("canonicalize discovered");
+        let expected_canon =
+            fs::canonicalize(nested.join("AGENTS.md")).expect("canonicalize expected");
+        assert_eq!(discovered_canon, expected_canon);
+
+        // get_user_instructions should load the nested document contents.
+        let res = get_user_instructions(&cfg).await.expect("doc expected");
+        assert_eq!(res, "nested cwd doc");
+    }
+
+    /// Test if AGENTS.md at the repo root is used when none exists in cwd.
+    #[tokio::test]
+    async fn falls_back_to_repo_root_when_cwd_missing_doc() {
+        let repo = tempfile::tempdir().expect("tempdir");
+
+        // Simulate a git repository at repo root.
+        std::fs::write(repo.path().join(".git"), "gitdir: /dev/null\n").unwrap();
+
+        // Create AGENTS.md only at repo root.
+        fs::write(repo.path().join("AGENTS.md"), "root level doc").unwrap();
+
+        // Nested cwd without its own AGENTS.md.
+        let nested = repo.path().join("nested/dir");
+        std::fs::create_dir_all(&nested).unwrap();
+
+        let mut cfg = make_config(&repo, 4096, None);
+        cfg.cwd = nested;
+
+        let discovered = super::discover_project_doc_path(&cfg)
+            .expect("discovery should succeed")
+            .expect("path should be found");
+        let discovered_canon = fs::canonicalize(&discovered).expect("canonicalize discovered");
+        let expected_canon =
+            fs::canonicalize(repo.path().join("AGENTS.md")).expect("canonicalize expected");
+        assert_eq!(discovered_canon, expected_canon);
+
+        let res = get_user_instructions(&cfg).await.expect("doc expected");
+        assert_eq!(res, "root level doc");
+    }
+
     /// Explicitly setting the byte-limit to zero disables project docs.
     #[tokio::test]
     async fn zero_byte_limit_disables_docs() {
diff --git a/codex-rs/core/src/protocol.rs b/codex-rs/core/src/protocol.rs
index 052806dd97..b861bda7e8 100644
--- a/codex-rs/core/src/protocol.rs
+++ b/codex-rs/core/src/protocol.rs
@@ -648,6 +648,10 @@ pub struct SessionConfiguredEvent {
     /// Tell the client what model is being queried.
     pub model: String,
 
+    /// The path(s) to AGENTS.md that were loaded for this session, if any.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub agents_doc_path: Option<String>,
+
     /// Identifier of the history log file (inode on Unix, 0 otherwise).
     pub history_log_id: u64,
 
@@ -713,6 +717,7 @@ mod tests {
             msg: EventMsg::SessionConfigured(SessionConfiguredEvent {
                 session_id,
                 model: "codex-mini-latest".to_string(),
+                agents_doc_path: None,
                 history_log_id: 0,
                 history_entry_count: 0,
             }),
diff --git a/codex-rs/exec/src/event_processor_with_human_output.rs b/codex-rs/exec/src/event_processor_with_human_output.rs
index 6b03ed7882..2e4a206357 100644
--- a/codex-rs/exec/src/event_processor_with_human_output.rs
+++ b/codex-rs/exec/src/event_processor_with_human_output.rs
@@ -494,6 +494,7 @@ impl EventProcessor for EventProcessorWithHumanOutput {
                     model,
                     history_log_id: _,
                     history_entry_count: _,
+                    ..
                 } = session_configured_event;
 
                 ts_println!(
diff --git a/codex-rs/mcp-server/src/mcp_protocol.rs b/codex-rs/mcp-server/src/mcp_protocol.rs
index 2f8858a37b..8104d4ac3e 100644
--- a/codex-rs/mcp-server/src/mcp_protocol.rs
+++ b/codex-rs/mcp-server/src/mcp_protocol.rs
@@ -906,6 +906,7 @@ mod tests {
             msg: EventMsg::SessionConfigured(codex_core::protocol::SessionConfiguredEvent {
                 session_id: uuid!("67e55044-10b1-426f-9247-bb680e5fe0c8"),
                 model: "codex-mini-latest".into(),
+                agents_doc_path: None,
                 history_log_id: 42,
                 history_entry_count: 3,
             }),
diff --git a/codex-rs/mcp-server/src/outgoing_message.rs b/codex-rs/mcp-server/src/outgoing_message.rs
index e7b0b9b63c..74079d3169 100644
--- a/codex-rs/mcp-server/src/outgoing_message.rs
+++ b/codex-rs/mcp-server/src/outgoing_message.rs
@@ -242,6 +242,7 @@ mod tests {
             msg: EventMsg::SessionConfigured(SessionConfiguredEvent {
                 session_id: Uuid::new_v4(),
                 model: "gpt-4o".to_string(),
+                agents_doc_path: None,
                 history_log_id: 1,
                 history_entry_count: 1000,
             }),
@@ -282,6 +283,7 @@ mod tests {
         let session_configured_event = SessionConfiguredEvent {
             session_id: Uuid::new_v4(),
             model: "gpt-4o".to_string(),
+            agents_doc_path: None,
             history_log_id: 1,
             history_entry_count: 1000,
         };
diff --git a/codex-rs/tui/src/history_cell.rs b/codex-rs/tui/src/history_cell.rs
index c577ce17a0..366046eca5 100644
--- a/codex-rs/tui/src/history_cell.rs
+++ b/codex-rs/tui/src/history_cell.rs
@@ -166,12 +166,8 @@ impl HistoryCell {
         event: SessionConfiguredEvent,
         is_first_event: bool,
     ) -> Self {
-        let SessionConfiguredEvent {
-            model,
-            session_id: _,
-            history_log_id: _,
-            history_entry_count: _,
-        } = event;
+        let model = event.model.clone();
+        let agents_doc_path = event.agents_doc_path.clone();
         if is_first_event {
             let cwd_str = match relativize_to_home(&config.cwd) {
                 Some(rel) if !rel.as_os_str().is_empty() => format!("~/{}", rel.display()),
@@ -179,7 +175,7 @@ impl HistoryCell {
                 None => config.cwd.display().to_string(),
             };
 
-            let lines: Vec<Line<'static>> = vec![
+            let mut lines: Vec<Line<'static>> = vec![
                 Line::from(vec![
                     Span::raw(">_ ").dim(),
                     Span::styled(
@@ -189,14 +185,78 @@ impl HistoryCell {
                     Span::raw(format!(" {cwd_str}")).dim(),
                 ]),
                 Line::from("".dim()),
-                Line::from(" Try one of the following commands to get started:".dim()),
-                Line::from("".dim()),
-                Line::from(format!(" 1. /init - {}", SlashCommand::Init.description()).dim()),
-                Line::from(format!(" 2. /status - {}", SlashCommand::Status.description()).dim()),
-                Line::from(format!(" 3. /compact - {}", SlashCommand::Compact.description()).dim()),
-                Line::from(format!(" 4. /new - {}", SlashCommand::New.description()).dim()),
-                Line::from("".dim()),
             ];
+
+            // If AGENTS.md is configured (either user-level or project-level),
+            // show a concise summary and omit the /init hint, but still show the other onboarding commands.
+            let show_init = agents_doc_path.is_none();
+
+            if let Some(paths_str) = agents_doc_path {
+                let global_path = config.codex_home.join("AGENTS.md");
+                let parts: Vec<String> = paths_str
+                    .split(" + ")
+                    .map(|s| s.trim().to_string())
+                    .filter(|s| !s.is_empty())
+                    .collect();
+
+                let mut user_path: Option<String> = None;
+                let mut project_path: Option<String> = None;
+                for p in parts {
+                    if p == global_path.display().to_string() {
+                        user_path = Some(p);
+                    } else {
+                        project_path = Some(p);
+                    }
+                }
+
+                let summary_line = match (user_path, project_path) {
+                    (Some(u), Some(pr)) => {
+                        format!(" Using user instructions ({u}) and project instructions ({pr})")
+                    }
+                    (Some(u), None) => format!("Using user instructions ({u})"),
+                    (None, Some(pr)) => format!(" Using project instructions ({pr})"),
+                    (None, None) => String::new(),
+                };
+
+                if !summary_line.is_empty() {
+                    lines.push(Line::from(summary_line.dim()));
+                    lines.push(Line::from("".dim()));
+                }
+            }
+
+            // Onboarding hints, with index based on whether /init is shown
+            lines.push(Line::from(
+                " Try one of the following commands to get started:".dim(),
+            ));
+            lines.push(Line::from("".dim()));
+
+            let mut cmd_index = 1;
+            if show_init {
+                lines.push(Line::from(
+                    format!(" {cmd_index}. /init - {}", SlashCommand::Init.description()).dim(),
+                ));
+                cmd_index += 1;
+            }
+            lines.push(Line::from(
+                format!(
+                    " {cmd_index}. /status - {}",
+                    SlashCommand::Status.description()
+                )
+                .dim(),
+            ));
+            cmd_index += 1;
+            lines.push(Line::from(
+                format!(
+                    " {cmd_index}. /compact - {}",
+                    SlashCommand::Compact.description()
+                )
+                .dim(),
+            ));
+            cmd_index += 1;
+            lines.push(Line::from(
+                format!(" {cmd_index}. /new - {}", SlashCommand::New.description()).dim(),
+            ));
+            lines.push(Line::from("".dim()));
             HistoryCell::WelcomeMessage {
                 view: TextBlock::new(lines),
             }

Review Comments

codex-rs/core/src/project_doc.rs

@@ -90,48 +79,94 @@ async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
     Ok(None)
 }
 
-/// Attempt to load the first candidate file found in `dir`. Returns the file
-/// contents (truncated if it exceeds `max_bytes`) when successful.
-async fn load_first_candidate(
+fn find_non_empty_candidate(
     dir: &Path,
     names: &[&str],
     max_bytes: usize,
-) -> std::io::Result<Option<String>> {
+) -> std::io::Result<Option<std::path::PathBuf>> {
+    use std::fs::File;
+    use std::io::Read;
+
     for name in names {
         let candidate = dir.join(name);
-
-        let file = match tokio::fs::File::open(&candidate).await {
+        let mut file = match File::open(&candidate) {
             Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue,
             Err(e) => return Err(e),
             Ok(f) => f,
         };
 
-        let size = file.metadata().await?.len();
-
-        let reader = tokio::io::BufReader::new(file);
-        let mut data = Vec::with_capacity(std::cmp::min(size as usize, max_bytes));
-        let mut limited = reader.take(max_bytes as u64);
-        limited.read_to_end(&mut data).await?;
-
-        if size as usize > max_bytes {
-            tracing::warn!(
-                "Project doc `{}` exceeds {max_bytes} bytes - truncating.",
-                candidate.display(),
-            );
-        }
-
-        let contents = String::from_utf8_lossy(&data).to_string();
+        let size = file.metadata()?.len() as usize;
+        let to_read = std::cmp::min(size, max_bytes);
+        let mut data = vec![0u8; to_read];
+        let read_n = file.read(&mut data)?;
+        let contents = String::from_utf8_lossy(&data[..read_n]).to_string();
         if contents.trim().is_empty() {
-            // Empty file  treat as not found.
             continue;
         }
 
-        return Ok(Some(contents));
+        return Ok(Some(candidate));
     }
 
     Ok(None)
 }
 
+/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single
+/// string of instructions.
+pub(crate) async fn get_user_instructions(config: &Config) -> Option<String> {
+    match find_project_doc(config).await {
+        Ok(Some(project_doc)) => match &config.user_instructions {
+            Some(original_instructions) => Some(format!(
+                "{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}"
+            )),
+            None => Some(project_doc),
+        },
+        Ok(None) => config.user_instructions.clone(),
+        Err(e) => {
+            error!("error trying to find project doc: {e:#}");
+            config.user_instructions.clone()
+        }
+    }
+}
+
+/// Attempt to locate and load the project documentation. Currently, the search
+/// starts from `Config::cwd`, but if we may want to consider other directories
+/// in the future, e.g., additional writable directories in the `SandboxPolicy`.
+///
+/// On success returns `Ok(Some(contents))`. If no documentation file is found
+/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as
+/// `Err` so callers can decide how to handle them.
+async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
+    use tokio::io::BufReader;

Please move to the top of the file.

@@ -90,48 +79,94 @@ async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
     Ok(None)
 }
 
-/// Attempt to load the first candidate file found in `dir`. Returns the file
-/// contents (truncated if it exceeds `max_bytes`) when successful.
-async fn load_first_candidate(
+fn find_non_empty_candidate(
     dir: &Path,
     names: &[&str],
     max_bytes: usize,
-) -> std::io::Result<Option<String>> {
+) -> std::io::Result<Option<std::path::PathBuf>> {
+    use std::fs::File;

Move these to the top.

@@ -23,65 +23,54 @@ const CANDIDATE_FILENAMES: &[&str] = &["AGENTS.md"];
 /// be concatenated with the following separator.
 const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n";
 
-/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single
-/// string of instructions.
-pub(crate) async fn get_user_instructions(config: &Config) -> Option<String> {
-    match find_project_doc(config).await {
-        Ok(Some(project_doc)) => match &config.user_instructions {
-            Some(original_instructions) => Some(format!(
-                "{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}"
-            )),
-            None => Some(project_doc),
-        },
-        Ok(None) => config.user_instructions.clone(),
-        Err(e) => {
-            error!("error trying to find project doc: {e:#}");
-            config.user_instructions.clone()
-        }
+/// Public helper that returns the discovered AGENTS.md path.
+/// Returns `Ok(None)` when no suitable file is found or
+/// `project_doc_max_bytes == 0`.
+pub fn discover_project_doc_path(config: &Config) -> std::io::Result<Option<std::path::PathBuf>> {
+    if config.project_doc_max_bytes == 0 {
+        return Ok(None);
     }
-}
 
-/// Attempt to locate and load the project documentation. Currently, the search
-/// starts from `Config::cwd`, but if we may want to consider other directories
-/// in the future, e.g., additional writable directories in the `SandboxPolicy`.
-///
-/// On success returns `Ok(Some(contents))`. If no documentation file is found
-/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as
-/// `Err` so callers can decide how to handle them.
-async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
-    let max_bytes = config.project_doc_max_bytes;
-
-    // Attempt to load from the working directory first.
-    if let Some(doc) = load_first_candidate(&config.cwd, CANDIDATE_FILENAMES, max_bytes).await? {
-        return Ok(Some(doc));
-    }
+    discover_project_doc_path_from_dir(
+        &config.cwd,
+        CANDIDATE_FILENAMES,
+        config.project_doc_max_bytes,
+    )

Arguably, using an expression is more idiomatic (applies to lines 30-38, but GitHub wouldn't let me select it). Also, I would take this opportunity to list the more common case first.

    if config.project_doc_max_bytes > 0 {
        discover_project_doc_path_from_dir(
            &config.cwd,
            CANDIDATE_FILENAMES,
            config.project_doc_max_bytes,
        )
    } else {
        Ok(None)
    }
@@ -23,65 +23,54 @@ const CANDIDATE_FILENAMES: &[&str] = &["AGENTS.md"];
 /// be concatenated with the following separator.
 const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n";
 
-/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single
-/// string of instructions.
-pub(crate) async fn get_user_instructions(config: &Config) -> Option<String> {
-    match find_project_doc(config).await {
-        Ok(Some(project_doc)) => match &config.user_instructions {
-            Some(original_instructions) => Some(format!(
-                "{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}"
-            )),
-            None => Some(project_doc),
-        },
-        Ok(None) => config.user_instructions.clone(),
-        Err(e) => {
-            error!("error trying to find project doc: {e:#}");
-            config.user_instructions.clone()
-        }
+/// Public helper that returns the discovered AGENTS.md path.
+/// Returns `Ok(None)` when no suitable file is found or
+/// `project_doc_max_bytes == 0`.
+pub fn discover_project_doc_path(config: &Config) -> std::io::Result<Option<std::path::PathBuf>> {
+    if config.project_doc_max_bytes == 0 {
+        return Ok(None);
     }
-}
 
-/// Attempt to locate and load the project documentation. Currently, the search
-/// starts from `Config::cwd`, but if we may want to consider other directories
-/// in the future, e.g., additional writable directories in the `SandboxPolicy`.
-///
-/// On success returns `Ok(Some(contents))`. If no documentation file is found
-/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as
-/// `Err` so callers can decide how to handle them.
-async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
-    let max_bytes = config.project_doc_max_bytes;
-
-    // Attempt to load from the working directory first.
-    if let Some(doc) = load_first_candidate(&config.cwd, CANDIDATE_FILENAMES, max_bytes).await? {
-        return Ok(Some(doc));
-    }
+    discover_project_doc_path_from_dir(
+        &config.cwd,
+        CANDIDATE_FILENAMES,
+        config.project_doc_max_bytes,
+    )
+}
 
-    // Walk up towards the filesystem root, stopping once we encounter the Git
-    // repository root. The presence of **either** a `.git` *file* or
-    // *directory* counts.
-    let mut dir = config.cwd.clone();
+fn discover_project_doc_path_from_dir(
+    start_dir: &Path,
+    names: &[&str],
+    max_bytes: usize,
+) -> std::io::Result<Option<std::path::PathBuf>> {
+    use std::fs;

to top

@@ -23,65 +23,54 @@ const CANDIDATE_FILENAMES: &[&str] = &["AGENTS.md"];
 /// be concatenated with the following separator.
 const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n";
 
-/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single
-/// string of instructions.
-pub(crate) async fn get_user_instructions(config: &Config) -> Option<String> {
-    match find_project_doc(config).await {
-        Ok(Some(project_doc)) => match &config.user_instructions {
-            Some(original_instructions) => Some(format!(
-                "{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}"
-            )),
-            None => Some(project_doc),
-        },
-        Ok(None) => config.user_instructions.clone(),
-        Err(e) => {
-            error!("error trying to find project doc: {e:#}");
-            config.user_instructions.clone()
-        }
+/// Public helper that returns the discovered AGENTS.md path.
+/// Returns `Ok(None)` when no suitable file is found or
+/// `project_doc_max_bytes == 0`.
+pub fn discover_project_doc_path(config: &Config) -> std::io::Result<Option<std::path::PathBuf>> {
+    if config.project_doc_max_bytes == 0 {
+        return Ok(None);
     }
-}
 
-/// Attempt to locate and load the project documentation. Currently, the search
-/// starts from `Config::cwd`, but if we may want to consider other directories
-/// in the future, e.g., additional writable directories in the `SandboxPolicy`.
-///
-/// On success returns `Ok(Some(contents))`. If no documentation file is found
-/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as
-/// `Err` so callers can decide how to handle them.
-async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
-    let max_bytes = config.project_doc_max_bytes;
-
-    // Attempt to load from the working directory first.
-    if let Some(doc) = load_first_candidate(&config.cwd, CANDIDATE_FILENAMES, max_bytes).await? {
-        return Ok(Some(doc));
-    }
+    discover_project_doc_path_from_dir(
+        &config.cwd,
+        CANDIDATE_FILENAMES,
+        config.project_doc_max_bytes,
+    )
+}
 
-    // Walk up towards the filesystem root, stopping once we encounter the Git
-    // repository root. The presence of **either** a `.git` *file* or
-    // *directory* counts.
-    let mut dir = config.cwd.clone();
+fn discover_project_doc_path_from_dir(
+    start_dir: &Path,
+    names: &[&str],
+    max_bytes: usize,
+) -> std::io::Result<Option<std::path::PathBuf>> {
+    use std::fs;
 
     // Canonicalize the path so that we do not end up in an infinite loop when
     // `cwd` contains `..` components.
    // `start_dir` contains `..` components.
@@ -23,65 +23,54 @@ const CANDIDATE_FILENAMES: &[&str] = &["AGENTS.md"];
 /// be concatenated with the following separator.
 const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n";
 
-/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single
-/// string of instructions.
-pub(crate) async fn get_user_instructions(config: &Config) -> Option<String> {
-    match find_project_doc(config).await {
-        Ok(Some(project_doc)) => match &config.user_instructions {
-            Some(original_instructions) => Some(format!(
-                "{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}"
-            )),
-            None => Some(project_doc),
-        },
-        Ok(None) => config.user_instructions.clone(),
-        Err(e) => {
-            error!("error trying to find project doc: {e:#}");
-            config.user_instructions.clone()
-        }
+/// Public helper that returns the discovered AGENTS.md path.
+/// Returns `Ok(None)` when no suitable file is found or
+/// `project_doc_max_bytes == 0`.
+pub fn discover_project_doc_path(config: &Config) -> std::io::Result<Option<std::path::PathBuf>> {
+    if config.project_doc_max_bytes == 0 {
+        return Ok(None);
     }
-}
 
-/// Attempt to locate and load the project documentation. Currently, the search
-/// starts from `Config::cwd`, but if we may want to consider other directories
-/// in the future, e.g., additional writable directories in the `SandboxPolicy`.
-///
-/// On success returns `Ok(Some(contents))`. If no documentation file is found
-/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as
-/// `Err` so callers can decide how to handle them.
-async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
-    let max_bytes = config.project_doc_max_bytes;
-
-    // Attempt to load from the working directory first.
-    if let Some(doc) = load_first_candidate(&config.cwd, CANDIDATE_FILENAMES, max_bytes).await? {
-        return Ok(Some(doc));
-    }
+    discover_project_doc_path_from_dir(
+        &config.cwd,
+        CANDIDATE_FILENAMES,
+        config.project_doc_max_bytes,
+    )
+}
 
-    // Walk up towards the filesystem root, stopping once we encounter the Git
-    // repository root. The presence of **either** a `.git` *file* or
-    // *directory* counts.
-    let mut dir = config.cwd.clone();
+fn discover_project_doc_path_from_dir(
+    start_dir: &Path,
+    names: &[&str],
+    max_bytes: usize,
+) -> std::io::Result<Option<std::path::PathBuf>> {
+    use std::fs;
 
     // Canonicalize the path so that we do not end up in an infinite loop when
     // `cwd` contains `..` components.

Though I assume I wrote this comment, Config.cwd should not have .. components, but maybe it could if you use --cd...?

@@ -23,65 +23,54 @@ const CANDIDATE_FILENAMES: &[&str] = &["AGENTS.md"];
 /// be concatenated with the following separator.
 const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n";
 
-/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single
-/// string of instructions.
-pub(crate) async fn get_user_instructions(config: &Config) -> Option<String> {
-    match find_project_doc(config).await {
-        Ok(Some(project_doc)) => match &config.user_instructions {
-            Some(original_instructions) => Some(format!(
-                "{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}"
-            )),
-            None => Some(project_doc),
-        },
-        Ok(None) => config.user_instructions.clone(),
-        Err(e) => {
-            error!("error trying to find project doc: {e:#}");
-            config.user_instructions.clone()
-        }
+/// Public helper that returns the discovered AGENTS.md path.
+/// Returns `Ok(None)` when no suitable file is found or
+/// `project_doc_max_bytes == 0`.
+pub fn discover_project_doc_path(config: &Config) -> std::io::Result<Option<std::path::PathBuf>> {
+    if config.project_doc_max_bytes == 0 {
+        return Ok(None);
     }
-}
 
-/// Attempt to locate and load the project documentation. Currently, the search
-/// starts from `Config::cwd`, but if we may want to consider other directories
-/// in the future, e.g., additional writable directories in the `SandboxPolicy`.
-///
-/// On success returns `Ok(Some(contents))`. If no documentation file is found
-/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as
-/// `Err` so callers can decide how to handle them.
-async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
-    let max_bytes = config.project_doc_max_bytes;
-
-    // Attempt to load from the working directory first.
-    if let Some(doc) = load_first_candidate(&config.cwd, CANDIDATE_FILENAMES, max_bytes).await? {
-        return Ok(Some(doc));
-    }
+    discover_project_doc_path_from_dir(
+        &config.cwd,
+        CANDIDATE_FILENAMES,
+        config.project_doc_max_bytes,
+    )
+}
 
-    // Walk up towards the filesystem root, stopping once we encounter the Git
-    // repository root. The presence of **either** a `.git` *file* or
-    // *directory* counts.
-    let mut dir = config.cwd.clone();
+fn discover_project_doc_path_from_dir(

Please make this function async and use the tokio async equivalents of filesystem operations.

@@ -90,48 +79,94 @@ async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
     Ok(None)
 }
 
-/// Attempt to load the first candidate file found in `dir`. Returns the file
-/// contents (truncated if it exceeds `max_bytes`) when successful.
-async fn load_first_candidate(
+fn find_non_empty_candidate(
     dir: &Path,
     names: &[&str],
     max_bytes: usize,
-) -> std::io::Result<Option<String>> {
+) -> std::io::Result<Option<std::path::PathBuf>> {
+    use std::fs::File;
+    use std::io::Read;
+
     for name in names {
         let candidate = dir.join(name);
-
-        let file = match tokio::fs::File::open(&candidate).await {
+        let mut file = match File::open(&candidate) {
             Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue,
             Err(e) => return Err(e),
             Ok(f) => f,
         };
 
-        let size = file.metadata().await?.len();
-
-        let reader = tokio::io::BufReader::new(file);
-        let mut data = Vec::with_capacity(std::cmp::min(size as usize, max_bytes));
-        let mut limited = reader.take(max_bytes as u64);
-        limited.read_to_end(&mut data).await?;
-
-        if size as usize > max_bytes {
-            tracing::warn!(
-                "Project doc `{}` exceeds {max_bytes} bytes - truncating.",
-                candidate.display(),
-            );
-        }
-
-        let contents = String::from_utf8_lossy(&data).to_string();
+        let size = file.metadata()?.len() as usize;

Use BufReader as before? From chat:

use tokio::io::{self, AsyncReadExt, BufReader};
use tokio::fs::File;

#[tokio::main]
async fn main() -> io::Result<()> {
    let file = File::open("example.txt").await?;
    let mut reader = BufReader::new(file);

    let n = 10;
    let mut buf = vec![0u8; n];
    let bytes_read = reader.read(&mut buf).await?;
    buf.truncate(bytes_read);

    println!("Read {} bytes: {:?}", bytes_read, buf);
    Ok(())
}
@@ -90,48 +79,94 @@ async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
     Ok(None)
 }
 
-/// Attempt to load the first candidate file found in `dir`. Returns the file
-/// contents (truncated if it exceeds `max_bytes`) when successful.
-async fn load_first_candidate(
+fn find_non_empty_candidate(
     dir: &Path,
     names: &[&str],
     max_bytes: usize,
-) -> std::io::Result<Option<String>> {
+) -> std::io::Result<Option<std::path::PathBuf>> {
+    use std::fs::File;
+    use std::io::Read;
+
     for name in names {
         let candidate = dir.join(name);
-
-        let file = match tokio::fs::File::open(&candidate).await {
+        let mut file = match File::open(&candidate) {
             Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue,
             Err(e) => return Err(e),
             Ok(f) => f,
         };
 
-        let size = file.metadata().await?.len();
-
-        let reader = tokio::io::BufReader::new(file);
-        let mut data = Vec::with_capacity(std::cmp::min(size as usize, max_bytes));
-        let mut limited = reader.take(max_bytes as u64);
-        limited.read_to_end(&mut data).await?;
-
-        if size as usize > max_bytes {
-            tracing::warn!(
-                "Project doc `{}` exceeds {max_bytes} bytes - truncating.",
-                candidate.display(),
-            );
-        }
-
-        let contents = String::from_utf8_lossy(&data).to_string();
+        let size = file.metadata()?.len() as usize;
+        let to_read = std::cmp::min(size, max_bytes);
+        let mut data = vec![0u8; to_read];
+        let read_n = file.read(&mut data)?;

read() does not guarantee it fills the buffer:

https://doc.rust-lang.org/std/io/trait.Read.html#tymethod.read

read_exact() does that.

@@ -90,48 +79,94 @@ async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
     Ok(None)
 }
 
-/// Attempt to load the first candidate file found in `dir`. Returns the file
-/// contents (truncated if it exceeds `max_bytes`) when successful.
-async fn load_first_candidate(
+fn find_non_empty_candidate(
     dir: &Path,
     names: &[&str],
     max_bytes: usize,
-) -> std::io::Result<Option<String>> {
+) -> std::io::Result<Option<std::path::PathBuf>> {
+    use std::fs::File;
+    use std::io::Read;
+
     for name in names {
         let candidate = dir.join(name);
-
-        let file = match tokio::fs::File::open(&candidate).await {
+        let mut file = match File::open(&candidate) {
             Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue,
             Err(e) => return Err(e),
             Ok(f) => f,
         };
 
-        let size = file.metadata().await?.len();
-
-        let reader = tokio::io::BufReader::new(file);
-        let mut data = Vec::with_capacity(std::cmp::min(size as usize, max_bytes));
-        let mut limited = reader.take(max_bytes as u64);
-        limited.read_to_end(&mut data).await?;
-
-        if size as usize > max_bytes {
-            tracing::warn!(
-                "Project doc `{}` exceeds {max_bytes} bytes - truncating.",
-                candidate.display(),
-            );
-        }
-
-        let contents = String::from_utf8_lossy(&data).to_string();
+        let size = file.metadata()?.len() as usize;
+        let to_read = std::cmp::min(size, max_bytes);
+        let mut data = vec![0u8; to_read];
+        let read_n = file.read(&mut data)?;
+        let contents = String::from_utf8_lossy(&data[..read_n]).to_string();
         if contents.trim().is_empty() {
-            // Empty file  treat as not found.
             continue;
         }
 
-        return Ok(Some(contents));
+        return Ok(Some(candidate));
     }
 
     Ok(None)
 }
 
+/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single
+/// string of instructions.
+pub(crate) async fn get_user_instructions(config: &Config) -> Option<String> {
+    match find_project_doc(config).await {
+        Ok(Some(project_doc)) => match &config.user_instructions {
+            Some(original_instructions) => Some(format!(
+                "{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}"
+            )),
+            None => Some(project_doc),
+        },
+        Ok(None) => config.user_instructions.clone(),
+        Err(e) => {
+            error!("error trying to find project doc: {e:#}");
+            config.user_instructions.clone()
+        }
+    }
+}
+
+/// Attempt to locate and load the project documentation. Currently, the search
+/// starts from `Config::cwd`, but if we may want to consider other directories
+/// in the future, e.g., additional writable directories in the `SandboxPolicy`.
+///
+/// On success returns `Ok(Some(contents))`. If no documentation file is found
+/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as
+/// `Err` so callers can decide how to handle them.
+async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
+    use tokio::io::BufReader;
+
+    let Some(path) = discover_project_doc_path(config)? else {
+        return Ok(None);
+    };
+
+    let max_bytes = config.project_doc_max_bytes;

Feels like we should consolidate this with logic in find_non_empty_candidate()?

codex-rs/tui/src/history_cell.rs

@@ -176,8 +176,19 @@ impl HistoryCell {
                 ]),
             ];
 
+            // Show which AGENTS.md is being used (or 'none' if unavailable).
+            let agents_value = if config.project_doc_max_bytes > 0 {
+                match codex_core::discover_project_doc_path(config) {

Oh, is this why the function is sync instead of async?

@@ -176,8 +176,19 @@ impl HistoryCell {
                 ]),
             ];
 
+            // Show which AGENTS.md is being used (or 'none' if unavailable).
+            let agents_value = if config.project_doc_max_bytes > 0 {
+                match codex_core::discover_project_doc_path(config) {

@nornagon-openai @easong-openai have you formed any opinions about introducing more async into the TUI?

@@ -176,8 +176,19 @@ impl HistoryCell {
                 ]),
             ];
 
+            // Show which AGENTS.md is being used (or 'none' if unavailable).
+            let agents_value = if config.project_doc_max_bytes > 0 {
+                match codex_core::discover_project_doc_path(config) {

@dylan-hurd-oai is also looking into trying to get some Git info at startup and potentially on every conversation turn, so I think we need to figure out a way to leverage async better here. (Note git_info.rs is already doing some of this for use with rollouts.)

@@ -176,8 +176,19 @@ impl HistoryCell {
                 ]),
             ];
 
+            // Show which AGENTS.md is being used (or 'none' if unavailable).
+            let agents_value = if config.project_doc_max_bytes > 0 {
+                match codex_core::discover_project_doc_path(config) {

Can should this be sent as part of the SessionConfiguredEvent?