# PR #1730: agents.md path shown at tui launch - URL: https://github.com/openai/codex/pull/1730 - Author: pap-openai - Created: 2025-07-29 21:28:05 UTC - Updated: 2025-08-06 22:53:44 UTC - Changes: +278/-14, Files changed: 9, Commits: 14 ## Description show agents.md on login, decouple function to find and load agents.md, add unit tests for agents.md discovery Preview: with an agents.md image without image ## Full Diff ```diff diff --git a/codex-rs/common/src/config_summary.rs b/codex-rs/common/src/config_summary.rs index 39d524731f..616030a85a 100644 --- a/codex-rs/common/src/config_summary.rs +++ b/codex-rs/common/src/config_summary.rs @@ -1,4 +1,5 @@ use codex_core::WireApi; +use codex_core::agents_doc_path_string; use codex_core::config::Config; use crate::sandbox_summary::summarize_sandbox_policy; @@ -7,6 +8,10 @@ use crate::sandbox_summary::summarize_sandbox_policy; pub fn create_config_summary_entries(config: &Config) -> Vec<(&'static str, String)> { let mut entries = vec![ ("workdir", config.cwd.display().to_string()), + ( + "agents.md", + agents_doc_path_string(config).unwrap_or_else(|| "none".to_string()), + ), ("model", config.model.clone()), ("provider", config.model_provider_id.clone()), ("approval", config.approval_policy.to_string()), diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 98d13b4cd6..64f0cb2bab 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -857,6 +857,7 @@ async fn submission_loop( msg: EventMsg::SessionConfigured(SessionConfiguredEvent { session_id, model, + agents_doc_path: crate::project_doc::agents_doc_path_string(&config), history_log_id, history_entry_count, }), diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index c728bd3125..037476be0c 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -39,6 +39,8 @@ mod openai_model_info; mod openai_tools; pub mod plan_tool; mod project_doc; +pub use project_doc::agents_doc_path_string; +pub use project_doc::discover_project_doc_path; pub mod protocol; mod rollout; pub(crate) mod safety; diff --git a/codex-rs/core/src/project_doc.rs b/codex-rs/core/src/project_doc.rs index 9f46159d1d..56807da1ce 100644 --- a/codex-rs/core/src/project_doc.rs +++ b/codex-rs/core/src/project_doc.rs @@ -12,7 +12,10 @@ //! exists, the search stops – we do **not** walk past the Git root. use crate::config::Config; +use std::fs; +use std::io::Read as _; use std::path::Path; +use std::path::PathBuf; use tokio::io::AsyncReadExt; use tracing::error; @@ -49,6 +52,10 @@ pub(crate) async fn get_user_instructions(config: &Config) -> Option { /// the function returns `Ok(None)`. Unexpected I/O failures bubble up as /// `Err` so callers can decide how to handle them. async fn find_project_doc(config: &Config) -> std::io::Result> { + if config.project_doc_max_bytes == 0 { + return Ok(None); + } + let max_bytes = config.project_doc_max_bytes; // Attempt to load from the working directory first. @@ -90,6 +97,124 @@ async fn find_project_doc(config: &Config) -> std::io::Result> { Ok(None) } +/// Public helper that returns the discovered AGENTS.md path. +/// Returns `Ok(None)` when no suitable file is found or `project_doc_max_bytes == 0`. +pub fn discover_project_doc_path(config: &Config) -> std::io::Result> { + if config.project_doc_max_bytes == 0 { + return Ok(None); + } + + discover_project_doc_path_from_dir(&config.cwd, CANDIDATE_FILENAMES) +} + +fn discover_project_doc_path_from_dir( + start_dir: &Path, + names: &[&str], +) -> std::io::Result> { + // Canonicalize the path so that we do not end up in an infinite loop when + // `cwd` contains `..` components. + let mut dir = start_dir.to_path_buf(); + if let Ok(canon) = dir.canonicalize() { + dir = canon; + } + + // Attempt in the working directory first. + if let Some(path) = first_nonempty_candidate_in_dir(&dir, names) { + return Ok(Some(path)); + } + + // Walk up towards the filesystem root, stopping once we encounter the Git root. + while let Some(parent) = dir.parent() { + let git_marker = dir.join(".git"); + let git_exists = match fs::metadata(&git_marker) { + Ok(_) => true, + Err(e) if e.kind() == std::io::ErrorKind::NotFound => false, + Err(e) => return Err(e), + }; + + if git_exists { + if let Some(path) = first_nonempty_candidate_in_dir(&dir, names) { + return Ok(Some(path)); + } + break; // do not walk past the Git root + } + + dir = parent.to_path_buf(); + } + + Ok(None) +} + +/// Return a human‑readable description of the AGENTS.md path(s) that will be +/// loaded for this session, or `None` if neither global nor project docs are +/// present. +/// +/// This mirrors the discovery logic used by `get_user_instructions()`: +/// - If `~/.codex/AGENTS.md` (global) is non‑empty, it is included. +/// - If a project AGENTS.md is found (respecting the byte limit and git root +/// stop), it is included. +/// - When the project_doc_max_bytes is set to 0, project docs are disabled. +pub fn agents_doc_path_string(config: &Config) -> Option { + let mut parts: Vec = Vec::new(); + + // Global AGENTS.md in CODEX_HOME. + if config.user_instructions.is_some() { + let global = config.codex_home.join("AGENTS.md"); + parts.push(global.display().to_string()); + } + + // Project AGENTS.md, unless disabled via byte‑limit == 0. + if config.project_doc_max_bytes > 0 { + if let Ok(Some(p)) = discover_project_doc_path(config) { + parts.push(p.display().to_string()); + } + } + + if parts.is_empty() { + None + } else { + Some(parts.join(" + ")) + } +} + +fn first_nonempty_candidate_in_dir(dir: &Path, names: &[&str]) -> Option { + for name in names { + let candidate = dir.join(name); + // Fast path: must exist and be a file. + let md = match std::fs::metadata(&candidate) { + Ok(m) if m.is_file() => m, + _ => continue, + }; + + // If the file is zero bytes, skip without reading. + if md.len() == 0 { + continue; + } + + // Read up to a modest limit to determine if the contents are effectively empty after trimming. + // Use the same limit as `project_doc_max_bytes` would permit by default. + const MAX_PEEK_BYTES: usize = 8 * 1024; + let mut file = match std::fs::File::open(&candidate) { + Ok(f) => f, + Err(_) => continue, + }; + let mut buf = Vec::with_capacity(std::cmp::min(md.len() as usize, MAX_PEEK_BYTES)); + if std::io::Read::by_ref(&mut file) + .take(MAX_PEEK_BYTES as u64) + .read_to_end(&mut buf) + .is_err() + { + continue; + } + let s = String::from_utf8_lossy(&buf); + if s.trim().is_empty() { + continue; + } + return Some(candidate); + } + None +} + /// Attempt to load the first candidate file found in `dir`. Returns the file /// contents (truncated if it exceeds `max_bytes`) when successful. async fn load_first_candidate( @@ -237,6 +362,68 @@ mod tests { assert_eq!(res, "root level doc"); } + /// Test if AGENTS.md located in the current working directory is preferred over the repo root. + #[tokio::test] + async fn prefers_cwd_doc_over_repo_root() { + let repo = tempfile::tempdir().expect("tempdir"); + + // Simulate a git repository at repo root. + std::fs::write(repo.path().join(".git"), "gitdir: /dev/null\n").unwrap(); + + // Create AGENTS.md at repo root and in a nested cwd. + fs::write(repo.path().join("AGENTS.md"), "root level doc").unwrap(); + let nested = repo.path().join("workspace/crate_b"); + std::fs::create_dir_all(&nested).unwrap(); + fs::write(nested.join("AGENTS.md"), "nested cwd doc").unwrap(); + + // Build config pointing at the nested dir. + let mut cfg = make_config(&repo, 4096, None); + cfg.cwd = nested.clone(); + + // Path discovery should prefer the nested cwd doc. + let discovered = super::discover_project_doc_path(&cfg) + .expect("discovery should succeed") + .expect("path should be found"); + let discovered_canon = fs::canonicalize(&discovered).expect("canonicalize discovered"); + let expected_canon = + fs::canonicalize(nested.join("AGENTS.md")).expect("canonicalize expected"); + assert_eq!(discovered_canon, expected_canon); + + // get_user_instructions should load the nested document contents. + let res = get_user_instructions(&cfg).await.expect("doc expected"); + assert_eq!(res, "nested cwd doc"); + } + + /// Test if AGENTS.md at the repo root is used when none exists in cwd. + #[tokio::test] + async fn falls_back_to_repo_root_when_cwd_missing_doc() { + let repo = tempfile::tempdir().expect("tempdir"); + + // Simulate a git repository at repo root. + std::fs::write(repo.path().join(".git"), "gitdir: /dev/null\n").unwrap(); + + // Create AGENTS.md only at repo root. + fs::write(repo.path().join("AGENTS.md"), "root level doc").unwrap(); + + // Nested cwd without its own AGENTS.md. + let nested = repo.path().join("nested/dir"); + std::fs::create_dir_all(&nested).unwrap(); + + let mut cfg = make_config(&repo, 4096, None); + cfg.cwd = nested; + + let discovered = super::discover_project_doc_path(&cfg) + .expect("discovery should succeed") + .expect("path should be found"); + let discovered_canon = fs::canonicalize(&discovered).expect("canonicalize discovered"); + let expected_canon = + fs::canonicalize(repo.path().join("AGENTS.md")).expect("canonicalize expected"); + assert_eq!(discovered_canon, expected_canon); + + let res = get_user_instructions(&cfg).await.expect("doc expected"); + assert_eq!(res, "root level doc"); + } + /// Explicitly setting the byte-limit to zero disables project docs. #[tokio::test] async fn zero_byte_limit_disables_docs() { diff --git a/codex-rs/core/src/protocol.rs b/codex-rs/core/src/protocol.rs index 052806dd97..b861bda7e8 100644 --- a/codex-rs/core/src/protocol.rs +++ b/codex-rs/core/src/protocol.rs @@ -648,6 +648,10 @@ pub struct SessionConfiguredEvent { /// Tell the client what model is being queried. pub model: String, + /// The path(s) to AGENTS.md that were loaded for this session, if any. + #[serde(skip_serializing_if = "Option::is_none")] + pub agents_doc_path: Option, + /// Identifier of the history log file (inode on Unix, 0 otherwise). pub history_log_id: u64, @@ -713,6 +717,7 @@ mod tests { msg: EventMsg::SessionConfigured(SessionConfiguredEvent { session_id, model: "codex-mini-latest".to_string(), + agents_doc_path: None, history_log_id: 0, history_entry_count: 0, }), diff --git a/codex-rs/exec/src/event_processor_with_human_output.rs b/codex-rs/exec/src/event_processor_with_human_output.rs index 6b03ed7882..2e4a206357 100644 --- a/codex-rs/exec/src/event_processor_with_human_output.rs +++ b/codex-rs/exec/src/event_processor_with_human_output.rs @@ -494,6 +494,7 @@ impl EventProcessor for EventProcessorWithHumanOutput { model, history_log_id: _, history_entry_count: _, + .. } = session_configured_event; ts_println!( diff --git a/codex-rs/mcp-server/src/mcp_protocol.rs b/codex-rs/mcp-server/src/mcp_protocol.rs index 2f8858a37b..8104d4ac3e 100644 --- a/codex-rs/mcp-server/src/mcp_protocol.rs +++ b/codex-rs/mcp-server/src/mcp_protocol.rs @@ -906,6 +906,7 @@ mod tests { msg: EventMsg::SessionConfigured(codex_core::protocol::SessionConfiguredEvent { session_id: uuid!("67e55044-10b1-426f-9247-bb680e5fe0c8"), model: "codex-mini-latest".into(), + agents_doc_path: None, history_log_id: 42, history_entry_count: 3, }), diff --git a/codex-rs/mcp-server/src/outgoing_message.rs b/codex-rs/mcp-server/src/outgoing_message.rs index e7b0b9b63c..74079d3169 100644 --- a/codex-rs/mcp-server/src/outgoing_message.rs +++ b/codex-rs/mcp-server/src/outgoing_message.rs @@ -242,6 +242,7 @@ mod tests { msg: EventMsg::SessionConfigured(SessionConfiguredEvent { session_id: Uuid::new_v4(), model: "gpt-4o".to_string(), + agents_doc_path: None, history_log_id: 1, history_entry_count: 1000, }), @@ -282,6 +283,7 @@ mod tests { let session_configured_event = SessionConfiguredEvent { session_id: Uuid::new_v4(), model: "gpt-4o".to_string(), + agents_doc_path: None, history_log_id: 1, history_entry_count: 1000, }; diff --git a/codex-rs/tui/src/history_cell.rs b/codex-rs/tui/src/history_cell.rs index c577ce17a0..366046eca5 100644 --- a/codex-rs/tui/src/history_cell.rs +++ b/codex-rs/tui/src/history_cell.rs @@ -166,12 +166,8 @@ impl HistoryCell { event: SessionConfiguredEvent, is_first_event: bool, ) -> Self { - let SessionConfiguredEvent { - model, - session_id: _, - history_log_id: _, - history_entry_count: _, - } = event; + let model = event.model.clone(); + let agents_doc_path = event.agents_doc_path.clone(); if is_first_event { let cwd_str = match relativize_to_home(&config.cwd) { Some(rel) if !rel.as_os_str().is_empty() => format!("~/{}", rel.display()), @@ -179,7 +175,7 @@ impl HistoryCell { None => config.cwd.display().to_string(), }; - let lines: Vec> = vec![ + let mut lines: Vec> = vec![ Line::from(vec![ Span::raw(">_ ").dim(), Span::styled( @@ -189,14 +185,78 @@ impl HistoryCell { Span::raw(format!(" {cwd_str}")).dim(), ]), Line::from("".dim()), - Line::from(" Try one of the following commands to get started:".dim()), - Line::from("".dim()), - Line::from(format!(" 1. /init - {}", SlashCommand::Init.description()).dim()), - Line::from(format!(" 2. /status - {}", SlashCommand::Status.description()).dim()), - Line::from(format!(" 3. /compact - {}", SlashCommand::Compact.description()).dim()), - Line::from(format!(" 4. /new - {}", SlashCommand::New.description()).dim()), - Line::from("".dim()), ]; + + // If AGENTS.md is configured (either user-level or project-level), + // show a concise summary and omit the /init hint, but still show the other onboarding commands. + let show_init = agents_doc_path.is_none(); + + if let Some(paths_str) = agents_doc_path { + let global_path = config.codex_home.join("AGENTS.md"); + let parts: Vec = paths_str + .split(" + ") + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .collect(); + + let mut user_path: Option = None; + let mut project_path: Option = None; + for p in parts { + if p == global_path.display().to_string() { + user_path = Some(p); + } else { + project_path = Some(p); + } + } + + let summary_line = match (user_path, project_path) { + (Some(u), Some(pr)) => { + format!(" Using user instructions ({u}) and project instructions ({pr})") + } + (Some(u), None) => format!("Using user instructions ({u})"), + (None, Some(pr)) => format!(" Using project instructions ({pr})"), + (None, None) => String::new(), + }; + + if !summary_line.is_empty() { + lines.push(Line::from(summary_line.dim())); + lines.push(Line::from("".dim())); + } + } + + // Onboarding hints, with index based on whether /init is shown + lines.push(Line::from( + " Try one of the following commands to get started:".dim(), + )); + lines.push(Line::from("".dim())); + + let mut cmd_index = 1; + if show_init { + lines.push(Line::from( + format!(" {cmd_index}. /init - {}", SlashCommand::Init.description()).dim(), + )); + cmd_index += 1; + } + lines.push(Line::from( + format!( + " {cmd_index}. /status - {}", + SlashCommand::Status.description() + ) + .dim(), + )); + cmd_index += 1; + lines.push(Line::from( + format!( + " {cmd_index}. /compact - {}", + SlashCommand::Compact.description() + ) + .dim(), + )); + cmd_index += 1; + lines.push(Line::from( + format!(" {cmd_index}. /new - {}", SlashCommand::New.description()).dim(), + )); + lines.push(Line::from("".dim())); HistoryCell::WelcomeMessage { view: TextBlock::new(lines), } ``` ## Review Comments ### codex-rs/core/src/project_doc.rs - Created: 2025-07-30 16:10:19 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2243193435 ```diff @@ -90,48 +79,94 @@ async fn find_project_doc(config: &Config) -> std::io::Result> { Ok(None) } -/// Attempt to load the first candidate file found in `dir`. Returns the file -/// contents (truncated if it exceeds `max_bytes`) when successful. -async fn load_first_candidate( +fn find_non_empty_candidate( dir: &Path, names: &[&str], max_bytes: usize, -) -> std::io::Result> { +) -> std::io::Result> { + use std::fs::File; + use std::io::Read; + for name in names { let candidate = dir.join(name); - - let file = match tokio::fs::File::open(&candidate).await { + let mut file = match File::open(&candidate) { Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue, Err(e) => return Err(e), Ok(f) => f, }; - let size = file.metadata().await?.len(); - - let reader = tokio::io::BufReader::new(file); - let mut data = Vec::with_capacity(std::cmp::min(size as usize, max_bytes)); - let mut limited = reader.take(max_bytes as u64); - limited.read_to_end(&mut data).await?; - - if size as usize > max_bytes { - tracing::warn!( - "Project doc `{}` exceeds {max_bytes} bytes - truncating.", - candidate.display(), - ); - } - - let contents = String::from_utf8_lossy(&data).to_string(); + let size = file.metadata()?.len() as usize; + let to_read = std::cmp::min(size, max_bytes); + let mut data = vec![0u8; to_read]; + let read_n = file.read(&mut data)?; + let contents = String::from_utf8_lossy(&data[..read_n]).to_string(); if contents.trim().is_empty() { - // Empty file – treat as not found. continue; } - return Ok(Some(contents)); + return Ok(Some(candidate)); } Ok(None) } +/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single +/// string of instructions. +pub(crate) async fn get_user_instructions(config: &Config) -> Option { + match find_project_doc(config).await { + Ok(Some(project_doc)) => match &config.user_instructions { + Some(original_instructions) => Some(format!( + "{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}" + )), + None => Some(project_doc), + }, + Ok(None) => config.user_instructions.clone(), + Err(e) => { + error!("error trying to find project doc: {e:#}"); + config.user_instructions.clone() + } + } +} + +/// Attempt to locate and load the project documentation. Currently, the search +/// starts from `Config::cwd`, but if we may want to consider other directories +/// in the future, e.g., additional writable directories in the `SandboxPolicy`. +/// +/// On success returns `Ok(Some(contents))`. If no documentation file is found +/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as +/// `Err` so callers can decide how to handle them. +async fn find_project_doc(config: &Config) -> std::io::Result> { + use tokio::io::BufReader; ``` > Please move to the top of the file. - Created: 2025-07-30 16:11:21 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2243195675 ```diff @@ -90,48 +79,94 @@ async fn find_project_doc(config: &Config) -> std::io::Result> { Ok(None) } -/// Attempt to load the first candidate file found in `dir`. Returns the file -/// contents (truncated if it exceeds `max_bytes`) when successful. -async fn load_first_candidate( +fn find_non_empty_candidate( dir: &Path, names: &[&str], max_bytes: usize, -) -> std::io::Result> { +) -> std::io::Result> { + use std::fs::File; ``` > Move these to the top. - Created: 2025-07-30 16:15:39 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2243205576 ```diff @@ -23,65 +23,54 @@ const CANDIDATE_FILENAMES: &[&str] = &["AGENTS.md"]; /// be concatenated with the following separator. const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n"; -/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single -/// string of instructions. -pub(crate) async fn get_user_instructions(config: &Config) -> Option { - match find_project_doc(config).await { - Ok(Some(project_doc)) => match &config.user_instructions { - Some(original_instructions) => Some(format!( - "{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}" - )), - None => Some(project_doc), - }, - Ok(None) => config.user_instructions.clone(), - Err(e) => { - error!("error trying to find project doc: {e:#}"); - config.user_instructions.clone() - } +/// Public helper that returns the discovered AGENTS.md path. +/// Returns `Ok(None)` when no suitable file is found or +/// `project_doc_max_bytes == 0`. +pub fn discover_project_doc_path(config: &Config) -> std::io::Result> { + if config.project_doc_max_bytes == 0 { + return Ok(None); } -} -/// Attempt to locate and load the project documentation. Currently, the search -/// starts from `Config::cwd`, but if we may want to consider other directories -/// in the future, e.g., additional writable directories in the `SandboxPolicy`. -/// -/// On success returns `Ok(Some(contents))`. If no documentation file is found -/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as -/// `Err` so callers can decide how to handle them. -async fn find_project_doc(config: &Config) -> std::io::Result> { - let max_bytes = config.project_doc_max_bytes; - - // Attempt to load from the working directory first. - if let Some(doc) = load_first_candidate(&config.cwd, CANDIDATE_FILENAMES, max_bytes).await? { - return Ok(Some(doc)); - } + discover_project_doc_path_from_dir( + &config.cwd, + CANDIDATE_FILENAMES, + config.project_doc_max_bytes, + ) ``` > Arguably, using an expression is more idiomatic (applies to lines 30-38, but GitHub wouldn't let me select it). Also, I would take this opportunity to list the more common case first. > > ```suggestion > if config.project_doc_max_bytes > 0 { > discover_project_doc_path_from_dir( > &config.cwd, > CANDIDATE_FILENAMES, > config.project_doc_max_bytes, > ) > } else { > Ok(None) > } > ``` - Created: 2025-07-30 16:16:27 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2243207508 ```diff @@ -23,65 +23,54 @@ const CANDIDATE_FILENAMES: &[&str] = &["AGENTS.md"]; /// be concatenated with the following separator. const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n"; -/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single -/// string of instructions. -pub(crate) async fn get_user_instructions(config: &Config) -> Option { - match find_project_doc(config).await { - Ok(Some(project_doc)) => match &config.user_instructions { - Some(original_instructions) => Some(format!( - "{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}" - )), - None => Some(project_doc), - }, - Ok(None) => config.user_instructions.clone(), - Err(e) => { - error!("error trying to find project doc: {e:#}"); - config.user_instructions.clone() - } +/// Public helper that returns the discovered AGENTS.md path. +/// Returns `Ok(None)` when no suitable file is found or +/// `project_doc_max_bytes == 0`. +pub fn discover_project_doc_path(config: &Config) -> std::io::Result> { + if config.project_doc_max_bytes == 0 { + return Ok(None); } -} -/// Attempt to locate and load the project documentation. Currently, the search -/// starts from `Config::cwd`, but if we may want to consider other directories -/// in the future, e.g., additional writable directories in the `SandboxPolicy`. -/// -/// On success returns `Ok(Some(contents))`. If no documentation file is found -/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as -/// `Err` so callers can decide how to handle them. -async fn find_project_doc(config: &Config) -> std::io::Result> { - let max_bytes = config.project_doc_max_bytes; - - // Attempt to load from the working directory first. - if let Some(doc) = load_first_candidate(&config.cwd, CANDIDATE_FILENAMES, max_bytes).await? { - return Ok(Some(doc)); - } + discover_project_doc_path_from_dir( + &config.cwd, + CANDIDATE_FILENAMES, + config.project_doc_max_bytes, + ) +} - // Walk up towards the filesystem root, stopping once we encounter the Git - // repository root. The presence of **either** a `.git` *file* or - // *directory* counts. - let mut dir = config.cwd.clone(); +fn discover_project_doc_path_from_dir( + start_dir: &Path, + names: &[&str], + max_bytes: usize, +) -> std::io::Result> { + use std::fs; ``` > to top - Created: 2025-07-30 16:17:51 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2243211102 ```diff @@ -23,65 +23,54 @@ const CANDIDATE_FILENAMES: &[&str] = &["AGENTS.md"]; /// be concatenated with the following separator. const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n"; -/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single -/// string of instructions. -pub(crate) async fn get_user_instructions(config: &Config) -> Option { - match find_project_doc(config).await { - Ok(Some(project_doc)) => match &config.user_instructions { - Some(original_instructions) => Some(format!( - "{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}" - )), - None => Some(project_doc), - }, - Ok(None) => config.user_instructions.clone(), - Err(e) => { - error!("error trying to find project doc: {e:#}"); - config.user_instructions.clone() - } +/// Public helper that returns the discovered AGENTS.md path. +/// Returns `Ok(None)` when no suitable file is found or +/// `project_doc_max_bytes == 0`. +pub fn discover_project_doc_path(config: &Config) -> std::io::Result> { + if config.project_doc_max_bytes == 0 { + return Ok(None); } -} -/// Attempt to locate and load the project documentation. Currently, the search -/// starts from `Config::cwd`, but if we may want to consider other directories -/// in the future, e.g., additional writable directories in the `SandboxPolicy`. -/// -/// On success returns `Ok(Some(contents))`. If no documentation file is found -/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as -/// `Err` so callers can decide how to handle them. -async fn find_project_doc(config: &Config) -> std::io::Result> { - let max_bytes = config.project_doc_max_bytes; - - // Attempt to load from the working directory first. - if let Some(doc) = load_first_candidate(&config.cwd, CANDIDATE_FILENAMES, max_bytes).await? { - return Ok(Some(doc)); - } + discover_project_doc_path_from_dir( + &config.cwd, + CANDIDATE_FILENAMES, + config.project_doc_max_bytes, + ) +} - // Walk up towards the filesystem root, stopping once we encounter the Git - // repository root. The presence of **either** a `.git` *file* or - // *directory* counts. - let mut dir = config.cwd.clone(); +fn discover_project_doc_path_from_dir( + start_dir: &Path, + names: &[&str], + max_bytes: usize, +) -> std::io::Result> { + use std::fs; // Canonicalize the path so that we do not end up in an infinite loop when // `cwd` contains `..` components. ``` > ```suggestion > // `start_dir` contains `..` components. > ``` - Created: 2025-07-30 16:18:41 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2243214469 ```diff @@ -23,65 +23,54 @@ const CANDIDATE_FILENAMES: &[&str] = &["AGENTS.md"]; /// be concatenated with the following separator. const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n"; -/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single -/// string of instructions. -pub(crate) async fn get_user_instructions(config: &Config) -> Option { - match find_project_doc(config).await { - Ok(Some(project_doc)) => match &config.user_instructions { - Some(original_instructions) => Some(format!( - "{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}" - )), - None => Some(project_doc), - }, - Ok(None) => config.user_instructions.clone(), - Err(e) => { - error!("error trying to find project doc: {e:#}"); - config.user_instructions.clone() - } +/// Public helper that returns the discovered AGENTS.md path. +/// Returns `Ok(None)` when no suitable file is found or +/// `project_doc_max_bytes == 0`. +pub fn discover_project_doc_path(config: &Config) -> std::io::Result> { + if config.project_doc_max_bytes == 0 { + return Ok(None); } -} -/// Attempt to locate and load the project documentation. Currently, the search -/// starts from `Config::cwd`, but if we may want to consider other directories -/// in the future, e.g., additional writable directories in the `SandboxPolicy`. -/// -/// On success returns `Ok(Some(contents))`. If no documentation file is found -/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as -/// `Err` so callers can decide how to handle them. -async fn find_project_doc(config: &Config) -> std::io::Result> { - let max_bytes = config.project_doc_max_bytes; - - // Attempt to load from the working directory first. - if let Some(doc) = load_first_candidate(&config.cwd, CANDIDATE_FILENAMES, max_bytes).await? { - return Ok(Some(doc)); - } + discover_project_doc_path_from_dir( + &config.cwd, + CANDIDATE_FILENAMES, + config.project_doc_max_bytes, + ) +} - // Walk up towards the filesystem root, stopping once we encounter the Git - // repository root. The presence of **either** a `.git` *file* or - // *directory* counts. - let mut dir = config.cwd.clone(); +fn discover_project_doc_path_from_dir( + start_dir: &Path, + names: &[&str], + max_bytes: usize, +) -> std::io::Result> { + use std::fs; // Canonicalize the path so that we do not end up in an infinite loop when // `cwd` contains `..` components. ``` > Though I assume I wrote this comment, `Config.cwd` should not have `..` components, but maybe it could if you use `--cd`...? - Created: 2025-07-30 16:23:57 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2243239863 ```diff @@ -23,65 +23,54 @@ const CANDIDATE_FILENAMES: &[&str] = &["AGENTS.md"]; /// be concatenated with the following separator. const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n"; -/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single -/// string of instructions. -pub(crate) async fn get_user_instructions(config: &Config) -> Option { - match find_project_doc(config).await { - Ok(Some(project_doc)) => match &config.user_instructions { - Some(original_instructions) => Some(format!( - "{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}" - )), - None => Some(project_doc), - }, - Ok(None) => config.user_instructions.clone(), - Err(e) => { - error!("error trying to find project doc: {e:#}"); - config.user_instructions.clone() - } +/// Public helper that returns the discovered AGENTS.md path. +/// Returns `Ok(None)` when no suitable file is found or +/// `project_doc_max_bytes == 0`. +pub fn discover_project_doc_path(config: &Config) -> std::io::Result> { + if config.project_doc_max_bytes == 0 { + return Ok(None); } -} -/// Attempt to locate and load the project documentation. Currently, the search -/// starts from `Config::cwd`, but if we may want to consider other directories -/// in the future, e.g., additional writable directories in the `SandboxPolicy`. -/// -/// On success returns `Ok(Some(contents))`. If no documentation file is found -/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as -/// `Err` so callers can decide how to handle them. -async fn find_project_doc(config: &Config) -> std::io::Result> { - let max_bytes = config.project_doc_max_bytes; - - // Attempt to load from the working directory first. - if let Some(doc) = load_first_candidate(&config.cwd, CANDIDATE_FILENAMES, max_bytes).await? { - return Ok(Some(doc)); - } + discover_project_doc_path_from_dir( + &config.cwd, + CANDIDATE_FILENAMES, + config.project_doc_max_bytes, + ) +} - // Walk up towards the filesystem root, stopping once we encounter the Git - // repository root. The presence of **either** a `.git` *file* or - // *directory* counts. - let mut dir = config.cwd.clone(); +fn discover_project_doc_path_from_dir( ``` > Please make this function `async` and use the `tokio` async equivalents of filesystem operations. - Created: 2025-07-30 16:26:41 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2243249154 ```diff @@ -90,48 +79,94 @@ async fn find_project_doc(config: &Config) -> std::io::Result> { Ok(None) } -/// Attempt to load the first candidate file found in `dir`. Returns the file -/// contents (truncated if it exceeds `max_bytes`) when successful. -async fn load_first_candidate( +fn find_non_empty_candidate( dir: &Path, names: &[&str], max_bytes: usize, -) -> std::io::Result> { +) -> std::io::Result> { + use std::fs::File; + use std::io::Read; + for name in names { let candidate = dir.join(name); - - let file = match tokio::fs::File::open(&candidate).await { + let mut file = match File::open(&candidate) { Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue, Err(e) => return Err(e), Ok(f) => f, }; - let size = file.metadata().await?.len(); - - let reader = tokio::io::BufReader::new(file); - let mut data = Vec::with_capacity(std::cmp::min(size as usize, max_bytes)); - let mut limited = reader.take(max_bytes as u64); - limited.read_to_end(&mut data).await?; - - if size as usize > max_bytes { - tracing::warn!( - "Project doc `{}` exceeds {max_bytes} bytes - truncating.", - candidate.display(), - ); - } - - let contents = String::from_utf8_lossy(&data).to_string(); + let size = file.metadata()?.len() as usize; ``` > Use `BufReader` as before? From chat: > > ``` > use tokio::io::{self, AsyncReadExt, BufReader}; > use tokio::fs::File; > > #[tokio::main] > async fn main() -> io::Result<()> { > let file = File::open("example.txt").await?; > let mut reader = BufReader::new(file); > > let n = 10; > let mut buf = vec![0u8; n]; > let bytes_read = reader.read(&mut buf).await?; > buf.truncate(bytes_read); > > println!("Read {} bytes: {:?}", bytes_read, buf); > Ok(()) > } > ``` - Created: 2025-07-30 16:31:01 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2243262881 ```diff @@ -90,48 +79,94 @@ async fn find_project_doc(config: &Config) -> std::io::Result> { Ok(None) } -/// Attempt to load the first candidate file found in `dir`. Returns the file -/// contents (truncated if it exceeds `max_bytes`) when successful. -async fn load_first_candidate( +fn find_non_empty_candidate( dir: &Path, names: &[&str], max_bytes: usize, -) -> std::io::Result> { +) -> std::io::Result> { + use std::fs::File; + use std::io::Read; + for name in names { let candidate = dir.join(name); - - let file = match tokio::fs::File::open(&candidate).await { + let mut file = match File::open(&candidate) { Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue, Err(e) => return Err(e), Ok(f) => f, }; - let size = file.metadata().await?.len(); - - let reader = tokio::io::BufReader::new(file); - let mut data = Vec::with_capacity(std::cmp::min(size as usize, max_bytes)); - let mut limited = reader.take(max_bytes as u64); - limited.read_to_end(&mut data).await?; - - if size as usize > max_bytes { - tracing::warn!( - "Project doc `{}` exceeds {max_bytes} bytes - truncating.", - candidate.display(), - ); - } - - let contents = String::from_utf8_lossy(&data).to_string(); + let size = file.metadata()?.len() as usize; + let to_read = std::cmp::min(size, max_bytes); + let mut data = vec![0u8; to_read]; + let read_n = file.read(&mut data)?; ``` > `read()` does not guarantee it fills the buffer: > > https://doc.rust-lang.org/std/io/trait.Read.html#tymethod.read > > `read_exact()` does that. - Created: 2025-07-30 16:32:05 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2243265577 ```diff @@ -90,48 +79,94 @@ async fn find_project_doc(config: &Config) -> std::io::Result> { Ok(None) } -/// Attempt to load the first candidate file found in `dir`. Returns the file -/// contents (truncated if it exceeds `max_bytes`) when successful. -async fn load_first_candidate( +fn find_non_empty_candidate( dir: &Path, names: &[&str], max_bytes: usize, -) -> std::io::Result> { +) -> std::io::Result> { + use std::fs::File; + use std::io::Read; + for name in names { let candidate = dir.join(name); - - let file = match tokio::fs::File::open(&candidate).await { + let mut file = match File::open(&candidate) { Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue, Err(e) => return Err(e), Ok(f) => f, }; - let size = file.metadata().await?.len(); - - let reader = tokio::io::BufReader::new(file); - let mut data = Vec::with_capacity(std::cmp::min(size as usize, max_bytes)); - let mut limited = reader.take(max_bytes as u64); - limited.read_to_end(&mut data).await?; - - if size as usize > max_bytes { - tracing::warn!( - "Project doc `{}` exceeds {max_bytes} bytes - truncating.", - candidate.display(), - ); - } - - let contents = String::from_utf8_lossy(&data).to_string(); + let size = file.metadata()?.len() as usize; + let to_read = std::cmp::min(size, max_bytes); + let mut data = vec![0u8; to_read]; + let read_n = file.read(&mut data)?; + let contents = String::from_utf8_lossy(&data[..read_n]).to_string(); if contents.trim().is_empty() { - // Empty file – treat as not found. continue; } - return Ok(Some(contents)); + return Ok(Some(candidate)); } Ok(None) } +/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single +/// string of instructions. +pub(crate) async fn get_user_instructions(config: &Config) -> Option { + match find_project_doc(config).await { + Ok(Some(project_doc)) => match &config.user_instructions { + Some(original_instructions) => Some(format!( + "{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}" + )), + None => Some(project_doc), + }, + Ok(None) => config.user_instructions.clone(), + Err(e) => { + error!("error trying to find project doc: {e:#}"); + config.user_instructions.clone() + } + } +} + +/// Attempt to locate and load the project documentation. Currently, the search +/// starts from `Config::cwd`, but if we may want to consider other directories +/// in the future, e.g., additional writable directories in the `SandboxPolicy`. +/// +/// On success returns `Ok(Some(contents))`. If no documentation file is found +/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as +/// `Err` so callers can decide how to handle them. +async fn find_project_doc(config: &Config) -> std::io::Result> { + use tokio::io::BufReader; + + let Some(path) = discover_project_doc_path(config)? else { + return Ok(None); + }; + + let max_bytes = config.project_doc_max_bytes; ``` > Feels like we should consolidate this with logic in `find_non_empty_candidate()`? ### codex-rs/tui/src/history_cell.rs - Created: 2025-07-30 16:34:22 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2243271535 ```diff @@ -176,8 +176,19 @@ impl HistoryCell { ]), ]; + // Show which AGENTS.md is being used (or 'none' if unavailable). + let agents_value = if config.project_doc_max_bytes > 0 { + match codex_core::discover_project_doc_path(config) { ``` > Oh, is this why the function is sync instead of async? - Created: 2025-07-30 16:36:12 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2243276241 ```diff @@ -176,8 +176,19 @@ impl HistoryCell { ]), ]; + // Show which AGENTS.md is being used (or 'none' if unavailable). + let agents_value = if config.project_doc_max_bytes > 0 { + match codex_core::discover_project_doc_path(config) { ``` > @nornagon-openai @easong-openai have you formed any opinions about introducing more `async` into the TUI? - Created: 2025-08-06 19:35:55 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2258095262 ```diff @@ -176,8 +176,19 @@ impl HistoryCell { ]), ]; + // Show which AGENTS.md is being used (or 'none' if unavailable). + let agents_value = if config.project_doc_max_bytes > 0 { + match codex_core::discover_project_doc_path(config) { ``` > @dylan-hurd-oai is also looking into trying to get some Git info at startup and potentially on every conversation turn, so I think we need to figure out a way to leverage async better here. (Note `git_info.rs` is already doing some of this for use with rollouts.) > - Created: 2025-08-06 19:36:47 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2258096688 ```diff @@ -176,8 +176,19 @@ impl HistoryCell { ]), ]; + // Show which AGENTS.md is being used (or 'none' if unavailable). + let agents_value = if config.project_doc_max_bytes > 0 { + match codex_core::discover_project_doc_path(config) { ``` > Can should this be sent as part of the `SessionConfiguredEvent`?