diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index 6cb6aaa629..0111f28e0a 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -35,6 +35,7 @@ pub mod openai_api_key; mod openai_model_info; mod openai_tools; mod project_doc; +pub use project_doc::discover_project_doc_path; pub mod protocol; mod rollout; mod safety; diff --git a/codex-rs/core/src/project_doc.rs b/codex-rs/core/src/project_doc.rs index 9f46159d1d..18f80ef95c 100644 --- a/codex-rs/core/src/project_doc.rs +++ b/codex-rs/core/src/project_doc.rs @@ -23,6 +23,89 @@ const CANDIDATE_FILENAMES: &[&str] = &["AGENTS.md"]; /// be concatenated with the following separator. const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n"; +/// Public helper that returns the discovered AGENTS.md path. +/// Returns `Ok(None)` when no suitable file is found or +/// `project_doc_max_bytes == 0`. +pub fn discover_project_doc_path(config: &Config) -> std::io::Result> { + if config.project_doc_max_bytes == 0 { + return Ok(None); + } + + discover_project_doc_path_from_dir(&config.cwd, CANDIDATE_FILENAMES, config.project_doc_max_bytes) +} + +fn discover_project_doc_path_from_dir( + start_dir: &Path, + names: &[&str], + max_bytes: usize, +) -> std::io::Result> { + use std::fs; + + // Canonicalize the path so that we do not end up in an infinite loop when + // `cwd` contains `..` components. + let mut dir = start_dir.to_path_buf(); + if let Ok(canon) = dir.canonicalize() { + dir = canon; + } + + // Attempt in the working directory first. + if let Some(path) = find_non_empty_candidate(&dir, names, max_bytes)? { + return Ok(Some(path)); + } + + // Walk up towards the filesystem root, stopping once we encounter the Git root. + while let Some(parent) = dir.parent() { + let git_marker = dir.join(".git"); + let git_exists = match fs::metadata(&git_marker) { + Ok(_) => true, + Err(e) if e.kind() == std::io::ErrorKind::NotFound => false, + Err(e) => return Err(e), + }; + + if git_exists { + if let Some(path) = find_non_empty_candidate(&dir, names, max_bytes)? { + return Ok(Some(path)); + } + break; // do not walk past the Git root + } + + dir = parent.to_path_buf(); + } + + Ok(None) +} + +fn find_non_empty_candidate( + dir: &Path, + names: &[&str], + max_bytes: usize, +) -> std::io::Result> { + use std::fs::File; + use std::io::Read; + + for name in names { + let candidate = dir.join(name); + let mut file = match File::open(&candidate) { + Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue, + Err(e) => return Err(e), + Ok(f) => f, + }; + + let size = file.metadata()?.len() as usize; + let to_read = std::cmp::min(size, max_bytes); + let mut data = vec![0u8; to_read]; + let read_n = file.read(&mut data)?; + let contents = String::from_utf8_lossy(&data[..read_n]).to_string(); + if contents.trim().is_empty() { + continue; + } + + return Ok(Some(candidate)); + } + + Ok(None) +} + /// Combines `Config::instructions` and `AGENTS.md` (if present) into a single /// string of instructions. pub(crate) async fn get_user_instructions(config: &Config) -> Option { @@ -49,88 +132,37 @@ pub(crate) async fn get_user_instructions(config: &Config) -> Option { /// the function returns `Ok(None)`. Unexpected I/O failures bubble up as /// `Err` so callers can decide how to handle them. async fn find_project_doc(config: &Config) -> std::io::Result> { + use tokio::io::BufReader; + + let Some(path) = discover_project_doc_path(config)? else { + return Ok(None); + }; + let max_bytes = config.project_doc_max_bytes; - // Attempt to load from the working directory first. - if let Some(doc) = load_first_candidate(&config.cwd, CANDIDATE_FILENAMES, max_bytes).await? { - return Ok(Some(doc)); + let file = tokio::fs::File::open(&path).await?; + let size = file.metadata().await?.len() as usize; + + let reader = BufReader::new(file); + let mut data = Vec::with_capacity(std::cmp::min(size, max_bytes)); + let mut limited = reader.take(max_bytes as u64); + limited.read_to_end(&mut data).await?; + + if size > max_bytes { + tracing::warn!( + "Project doc `{}` exceeds {max_bytes} bytes - truncating.", + path.display(), + ); } - // Walk up towards the filesystem root, stopping once we encounter the Git - // repository root. The presence of **either** a `.git` *file* or - // *directory* counts. - let mut dir = config.cwd.clone(); - - // Canonicalize the path so that we do not end up in an infinite loop when - // `cwd` contains `..` components. - if let Ok(canon) = dir.canonicalize() { - dir = canon; + let contents = String::from_utf8_lossy(&data).to_string(); + if contents.trim().is_empty() { + return Ok(None); } - while let Some(parent) = dir.parent() { - // `.git` can be a *file* (for worktrees or submodules) or a *dir*. - let git_marker = dir.join(".git"); - let git_exists = match tokio::fs::metadata(&git_marker).await { - Ok(_) => true, - Err(e) if e.kind() == std::io::ErrorKind::NotFound => false, - Err(e) => return Err(e), - }; - - if git_exists { - // We are at the repo root – attempt one final load. - if let Some(doc) = load_first_candidate(&dir, CANDIDATE_FILENAMES, max_bytes).await? { - return Ok(Some(doc)); - } - break; - } - - dir = parent.to_path_buf(); - } - - Ok(None) + Ok(Some(contents)) } -/// Attempt to load the first candidate file found in `dir`. Returns the file -/// contents (truncated if it exceeds `max_bytes`) when successful. -async fn load_first_candidate( - dir: &Path, - names: &[&str], - max_bytes: usize, -) -> std::io::Result> { - for name in names { - let candidate = dir.join(name); - - let file = match tokio::fs::File::open(&candidate).await { - Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue, - Err(e) => return Err(e), - Ok(f) => f, - }; - - let size = file.metadata().await?.len(); - - let reader = tokio::io::BufReader::new(file); - let mut data = Vec::with_capacity(std::cmp::min(size as usize, max_bytes)); - let mut limited = reader.take(max_bytes as u64); - limited.read_to_end(&mut data).await?; - - if size as usize > max_bytes { - tracing::warn!( - "Project doc `{}` exceeds {max_bytes} bytes - truncating.", - candidate.display(), - ); - } - - let contents = String::from_utf8_lossy(&data).to_string(); - if contents.trim().is_empty() { - // Empty file – treat as not found. - continue; - } - - return Ok(Some(contents)); - } - - Ok(None) -} #[cfg(test)] mod tests { @@ -237,6 +269,66 @@ mod tests { assert_eq!(res, "root level doc"); } + /// Test if AGENTS.md located in the current working directory is preferred over the repo root. + #[tokio::test] + async fn prefers_cwd_doc_over_repo_root() { + let repo = tempfile::tempdir().expect("tempdir"); + + // Simulate a git repository at repo root. + std::fs::write(repo.path().join(".git"), "gitdir: /dev/null\n").unwrap(); + + // Create AGENTS.md at repo root and in a nested cwd. + fs::write(repo.path().join("AGENTS.md"), "root level doc").unwrap(); + let nested = repo.path().join("workspace/crate_b"); + std::fs::create_dir_all(&nested).unwrap(); + fs::write(nested.join("AGENTS.md"), "nested cwd doc").unwrap(); + + // Build config pointing at the nested dir. + let mut cfg = make_config(&repo, 4096, None); + cfg.cwd = nested.clone(); + + // Path discovery should prefer the nested cwd doc. + let discovered = super::discover_project_doc_path(&cfg) + .expect("discovery should succeed") + .expect("path should be found"); + let discovered_canon = fs::canonicalize(&discovered).expect("canonicalize discovered"); + let expected_canon = fs::canonicalize(nested.join("AGENTS.md")).expect("canonicalize expected"); + assert_eq!(discovered_canon, expected_canon); + + // get_user_instructions should load the nested document contents. + let res = get_user_instructions(&cfg).await.expect("doc expected"); + assert_eq!(res, "nested cwd doc"); + } + + /// Test if AGENTS.md at the repo root is used when none exists in cwd. + #[tokio::test] + async fn falls_back_to_repo_root_when_cwd_missing_doc() { + let repo = tempfile::tempdir().expect("tempdir"); + + // Simulate a git repository at repo root. + std::fs::write(repo.path().join(".git"), "gitdir: /dev/null\n").unwrap(); + + // Create AGENTS.md only at repo root. + fs::write(repo.path().join("AGENTS.md"), "root level doc").unwrap(); + + // Nested cwd without its own AGENTS.md. + let nested = repo.path().join("nested/dir"); + std::fs::create_dir_all(&nested).unwrap(); + + let mut cfg = make_config(&repo, 4096, None); + cfg.cwd = nested; + + let discovered = super::discover_project_doc_path(&cfg) + .expect("discovery should succeed") + .expect("path should be found"); + let discovered_canon = fs::canonicalize(&discovered).expect("canonicalize discovered"); + let expected_canon = fs::canonicalize(repo.path().join("AGENTS.md")).expect("canonicalize expected"); + assert_eq!(discovered_canon, expected_canon); + + let res = get_user_instructions(&cfg).await.expect("doc expected"); + assert_eq!(res, "root level doc"); + } + /// Explicitly setting the byte-limit to zero disables project docs. #[tokio::test] async fn zero_byte_limit_disables_docs() { diff --git a/codex-rs/tui/src/history_cell.rs b/codex-rs/tui/src/history_cell.rs index ab657163ad..4634d5baaf 100644 --- a/codex-rs/tui/src/history_cell.rs +++ b/codex-rs/tui/src/history_cell.rs @@ -176,8 +176,19 @@ impl HistoryCell { ]), ]; + // Show which AGENTS.md is being used (or 'none' if unavailable). + let agents_value = if config.project_doc_max_bytes > 0 { + match codex_core::discover_project_doc_path(config) { + Ok(Some(path)) => path.display().to_string(), + _ => "none".to_string(), + } + } else { + "none".to_string() + }; + let mut entries = vec![ ("workdir", config.cwd.display().to_string()), + ("agents.md", agents_value), ("model", config.model.clone()), ("provider", config.model_provider_id.clone()), ("approval", config.approval_policy.to_string()),