//! Project-level documentation discovery. //! //! Project-level documentation is primarily stored in files named `AGENTS.md`. //! Additional fallback filenames can be configured via `project_doc_fallback_filenames`. //! We include the concatenation of all files found along the path from the //! repository root to the current working directory as follows: //! //! 1. Determine the Git repository root by walking upwards from the current //! working directory until a `.git` directory or file is found. If no Git //! root is found, only the current working directory is considered. //! 2. Collect every `AGENTS.md` found from the repository root down to the //! current working directory (inclusive) and concatenate their contents in //! that order. //! 3. We do **not** walk past the Git root. use crate::config::Config; use crate::exec::SandboxType; use crate::exec_env::create_env; use crate::protocol::SandboxPolicy; use crate::sandboxing::CommandSpec; use crate::sandboxing::SandboxManager; use crate::sandboxing::SandboxTransformError; use crate::sandboxing::execute_env; use codex_utils_string::take_bytes_at_char_boundary; use dunce::canonicalize as normalize_path; use std::io; use std::path::Path; use std::path::PathBuf; use tokio::io::AsyncReadExt; use tracing::error; /// Default filename scanned for project-level docs. pub const DEFAULT_PROJECT_DOC_FILENAME: &str = "AGENTS.md"; /// Preferred local override for project-level docs. pub const LOCAL_PROJECT_DOC_FILENAME: &str = "AGENTS.override.md"; /// When both `Config::instructions` and the project doc are present, they will /// be concatenated with the following separator. const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n"; const INTERPOLATION_TIMEOUT_MS: u64 = 5_000; /// Combines `Config::instructions` and `AGENTS.md` (if present) into a single /// string of instructions. pub(crate) async fn get_user_instructions(config: &Config) -> Option { match read_project_docs(config).await { Ok(Some(project_doc)) => match &config.user_instructions { Some(original_instructions) => Some(format!( "{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}" )), None => Some(project_doc), }, Ok(None) => config.user_instructions.clone(), Err(e) => { error!("error trying to find project doc: {e:#}"); config.user_instructions.clone() } } } /// Attempt to locate and load the project documentation. /// /// On success returns `Ok(Some(contents))` where `contents` is the /// concatenation of all discovered docs. If no documentation file is found the /// function returns `Ok(None)`. Unexpected I/O failures bubble up as `Err` so /// callers can decide how to handle them. pub async fn read_project_docs(config: &Config) -> std::io::Result> { let max_total = config.project_doc_max_bytes; if max_total == 0 { return Ok(None); } let paths = discover_project_doc_paths(config)?; if paths.is_empty() { return Ok(None); } let mut remaining: u64 = max_total as u64; let mut parts: Vec = Vec::new(); for p in paths { if remaining == 0 { break; } let file = match tokio::fs::File::open(&p).await { Ok(f) => f, Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue, Err(e) => return Err(e), }; let size = file.metadata().await?.len(); let mut reader = tokio::io::BufReader::new(file).take(remaining); let mut data: Vec = Vec::new(); reader.read_to_end(&mut data).await?; if size > remaining { tracing::warn!( "Project doc `{}` exceeds remaining budget ({} bytes) - truncating.", p.display(), remaining, ); } let text = String::from_utf8_lossy(&data).to_string(); if !text.trim().is_empty() { let doc_dir = p .parent() .map(Path::to_path_buf) .unwrap_or_else(|| config.cwd.clone()); let mut interpolated = apply_interpolations(&text, &doc_dir, config).await; let budget = remaining.min(usize::MAX as u64) as usize; if interpolated.len() > budget { tracing::warn!( "Project doc `{}` interpolations exceed remaining budget ({} bytes) - truncating.", p.display(), budget ); if budget == 0 { interpolated.clear(); } else { interpolated = take_bytes_at_char_boundary(&interpolated, budget).to_string(); } } if !interpolated.trim().is_empty() { remaining = remaining.saturating_sub(interpolated.len() as u64); parts.push(interpolated); } } } if parts.is_empty() { Ok(None) } else { Ok(Some(parts.join("\n\n"))) } } /// Discover the list of AGENTS.md files using the same search rules as /// `read_project_docs`, but return the file paths instead of concatenated /// contents. The list is ordered from repository root to the current working /// directory (inclusive). Symlinks are allowed. When `project_doc_max_bytes` /// is zero, returns an empty list. pub fn discover_project_doc_paths(config: &Config) -> std::io::Result> { let mut dir = config.cwd.clone(); if let Ok(canon) = normalize_path(&dir) { dir = canon; } // Build chain from cwd upwards and detect git root. let mut chain: Vec = vec![dir.clone()]; let mut git_root: Option = None; let mut cursor = dir; while let Some(parent) = cursor.parent() { let git_marker = cursor.join(".git"); let git_exists = match std::fs::metadata(&git_marker) { Ok(_) => true, Err(e) if e.kind() == std::io::ErrorKind::NotFound => false, Err(e) => return Err(e), }; if git_exists { git_root = Some(cursor.clone()); break; } chain.push(parent.to_path_buf()); cursor = parent.to_path_buf(); } let search_dirs: Vec = if let Some(root) = git_root { let mut dirs: Vec = Vec::new(); let mut saw_root = false; for p in chain.iter().rev() { if !saw_root { if p == &root { saw_root = true; } else { continue; } } dirs.push(p.clone()); } dirs } else { vec![config.cwd.clone()] }; let mut found: Vec = Vec::new(); let candidate_filenames = candidate_filenames(config); for d in search_dirs { for name in &candidate_filenames { let candidate = d.join(name); match std::fs::symlink_metadata(&candidate) { Ok(md) => { let ft = md.file_type(); // Allow regular files and symlinks; opening will later fail for dangling links. if ft.is_file() || ft.is_symlink() { found.push(candidate); break; } } Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue, Err(e) => return Err(e), } } } Ok(found) } async fn apply_interpolations(text: &str, doc_dir: &Path, config: &Config) -> String { if !text.contains("{!") { return text.to_string(); } let mut cursor = 0; let mut output = String::with_capacity(text.len()); while let Some(relative_start) = text[cursor..].find("{!") { let start = cursor + relative_start; output.push_str(&text[cursor..start]); let script_start = start + 2; let remaining = &text[script_start..]; match remaining.find('}') { Some(relative_end) => { let end = script_start + relative_end; let placeholder = &text[start..=end]; let script = text[script_start..end].trim(); if script.is_empty() { cursor = end + 1; continue; } match run_interpolation_script(script, doc_dir, config).await { Ok(replacement) => output.push_str(&replacement), Err(err) => { tracing::warn!( script, error = %err, "Failed to evaluate AGENTS.md interpolation" ); output.push_str(placeholder); } } cursor = end + 1; } None => { output.push_str(&text[start..]); cursor = text.len(); } } } if cursor < text.len() { output.push_str(&text[cursor..]); } output } async fn run_interpolation_script( script: &str, doc_dir: &Path, config: &Config, ) -> io::Result { let policy = SandboxPolicy::new_read_only_policy(); let env = create_env(&config.shell_environment_policy); let spec = CommandSpec { program: "bash".to_string(), args: vec!["-lc".to_string(), script.to_string()], cwd: doc_dir.to_path_buf(), env, timeout_ms: Some(INTERPOLATION_TIMEOUT_MS), with_escalated_permissions: None, justification: None, }; let manager = SandboxManager::new(); let initial = determine_sandbox_type(); let allow_fallback = !matches!(initial, SandboxType::None); let sandbox_exe = config.codex_linux_sandbox_exe.as_ref(); let mut attempts: Vec<(SandboxType, Option<&PathBuf>, bool)> = vec![(initial, sandbox_exe, allow_fallback)]; if allow_fallback { attempts.push((SandboxType::None, None, false)); } for (sandbox_type, exe, can_fallback) in attempts { let exec_env = match manager.transform(&spec, &policy, sandbox_type, doc_dir, exe) { Ok(env) => env, Err(SandboxTransformError::MissingLinuxSandboxExecutable) if can_fallback => { tracing::warn!( "codex-linux-sandbox executable missing; retrying AGENTS.md interpolation without sandbox" ); continue; } Err(err) if can_fallback => { tracing::warn!( error = %err, "Sandbox setup failed for AGENTS.md interpolation; retrying without sandbox" ); continue; } Err(err) => return Err(sandbox_error_to_io(err)), }; match execute_env(&exec_env, &policy, None).await { Ok(result) => { if result.exit_code == 0 { let stdout = result.stdout.text; return Ok(stdout.trim_end_matches(['\n', '\r']).to_string()); } if can_fallback && manager.denied(sandbox_type, &result) { tracing::warn!( exit_code = result.exit_code, "Sandbox denied AGENTS.md interpolation; retrying without sandbox" ); continue; } let aggregated = result.aggregated_output.text; let trimmed = aggregated.trim(); let mut message = format!("command `{script}` exited with {}", result.exit_code); if !trimmed.is_empty() { message.push_str(": "); message.push_str(trimmed); } return Err(io::Error::other(message)); } Err(err) if can_fallback => { tracing::warn!( error = %err, "Sandbox execution failed for AGENTS.md interpolation; retrying without sandbox" ); continue; } Err(err) => { return Err(io::Error::other(err.to_string())); } } } Err(io::Error::other(format!( "command `{script}` failed to execute" ))) } #[cfg(target_os = "macos")] fn determine_sandbox_type() -> SandboxType { SandboxType::MacosSeatbelt } #[cfg(target_os = "linux")] fn determine_sandbox_type() -> SandboxType { SandboxType::LinuxSeccomp } #[cfg(not(any(target_os = "macos", target_os = "linux")))] fn determine_sandbox_type() -> SandboxType { SandboxType::None } fn sandbox_error_to_io(err: SandboxTransformError) -> io::Error { io::Error::other(err.to_string()) } fn candidate_filenames<'a>(config: &'a Config) -> Vec<&'a str> { let mut names: Vec<&'a str> = Vec::with_capacity(2 + config.project_doc_fallback_filenames.len()); names.push(LOCAL_PROJECT_DOC_FILENAME); names.push(DEFAULT_PROJECT_DOC_FILENAME); for candidate in &config.project_doc_fallback_filenames { let candidate = candidate.as_str(); if candidate.is_empty() { continue; } if !names.contains(&candidate) { names.push(candidate); } } names } #[cfg(test)] mod tests { use super::*; use crate::config::ConfigOverrides; use crate::config::ConfigToml; use std::fs; use tempfile::TempDir; /// Helper that returns a `Config` pointing at `root` and using `limit` as /// the maximum number of bytes to embed from AGENTS.md. The caller can /// optionally specify a custom `instructions` string – when `None` the /// value is cleared to mimic a scenario where no system instructions have /// been configured. fn make_config(root: &TempDir, limit: usize, instructions: Option<&str>) -> Config { let codex_home = TempDir::new().unwrap(); let mut config = Config::load_from_base_config_with_overrides( ConfigToml::default(), ConfigOverrides::default(), codex_home.path().to_path_buf(), ) .expect("defaults for test should always succeed"); config.cwd = root.path().to_path_buf(); config.project_doc_max_bytes = limit; config.user_instructions = instructions.map(ToOwned::to_owned); config } fn make_config_with_fallback( root: &TempDir, limit: usize, instructions: Option<&str>, fallbacks: &[&str], ) -> Config { let mut config = make_config(root, limit, instructions); config.project_doc_fallback_filenames = fallbacks .iter() .map(std::string::ToString::to_string) .collect(); config } /// AGENTS.md missing – should yield `None`. #[tokio::test] async fn no_doc_file_returns_none() { let tmp = tempfile::tempdir().expect("tempdir"); let res = get_user_instructions(&make_config(&tmp, 4096, None)).await; assert!( res.is_none(), "Expected None when AGENTS.md is absent and no system instructions provided" ); assert!(res.is_none(), "Expected None when AGENTS.md is absent"); } /// Small file within the byte-limit is returned unmodified. #[tokio::test] async fn doc_smaller_than_limit_is_returned() { let tmp = tempfile::tempdir().expect("tempdir"); fs::write(tmp.path().join("AGENTS.md"), "hello world").unwrap(); let res = get_user_instructions(&make_config(&tmp, 4096, None)) .await .expect("doc expected"); assert_eq!( res, "hello world", "The document should be returned verbatim when it is smaller than the limit and there are no existing instructions" ); } /// Oversize file is truncated to `project_doc_max_bytes`. #[tokio::test] async fn doc_larger_than_limit_is_truncated() { const LIMIT: usize = 1024; let tmp = tempfile::tempdir().expect("tempdir"); let huge = "A".repeat(LIMIT * 2); // 2 KiB fs::write(tmp.path().join("AGENTS.md"), &huge).unwrap(); let res = get_user_instructions(&make_config(&tmp, LIMIT, None)) .await .expect("doc expected"); assert_eq!(res.len(), LIMIT, "doc should be truncated to LIMIT bytes"); assert_eq!(res, huge[..LIMIT]); } /// When `cwd` is nested inside a repo, the search should locate AGENTS.md /// placed at the repository root (identified by `.git`). #[tokio::test] async fn finds_doc_in_repo_root() { let repo = tempfile::tempdir().expect("tempdir"); // Simulate a git repository. Note .git can be a file or a directory. std::fs::write( repo.path().join(".git"), "gitdir: /path/to/actual/git/dir\n", ) .unwrap(); // Put the doc at the repo root. fs::write(repo.path().join("AGENTS.md"), "root level doc").unwrap(); // Now create a nested working directory: repo/workspace/crate_a let nested = repo.path().join("workspace/crate_a"); std::fs::create_dir_all(&nested).unwrap(); // Build config pointing at the nested dir. let mut cfg = make_config(&repo, 4096, None); cfg.cwd = nested; let res = get_user_instructions(&cfg).await.expect("doc expected"); assert_eq!(res, "root level doc"); } /// Explicitly setting the byte-limit to zero disables project docs. #[tokio::test] async fn zero_byte_limit_disables_docs() { let tmp = tempfile::tempdir().expect("tempdir"); fs::write(tmp.path().join("AGENTS.md"), "something").unwrap(); let res = get_user_instructions(&make_config(&tmp, 0, None)).await; assert!( res.is_none(), "With limit 0 the function should return None" ); } /// When both system instructions *and* a project doc are present the two /// should be concatenated with the separator. #[tokio::test] async fn merges_existing_instructions_with_project_doc() { let tmp = tempfile::tempdir().expect("tempdir"); fs::write(tmp.path().join("AGENTS.md"), "proj doc").unwrap(); const INSTRUCTIONS: &str = "base instructions"; let res = get_user_instructions(&make_config(&tmp, 4096, Some(INSTRUCTIONS))) .await .expect("should produce a combined instruction string"); let expected = format!("{INSTRUCTIONS}{PROJECT_DOC_SEPARATOR}{}", "proj doc"); assert_eq!(res, expected); } /// If there are existing system instructions but the project doc is /// missing we expect the original instructions to be returned unchanged. #[tokio::test] async fn keeps_existing_instructions_when_doc_missing() { let tmp = tempfile::tempdir().expect("tempdir"); const INSTRUCTIONS: &str = "some instructions"; let res = get_user_instructions(&make_config(&tmp, 4096, Some(INSTRUCTIONS))).await; assert_eq!(res, Some(INSTRUCTIONS.to_string())); } /// When both the repository root and the working directory contain /// AGENTS.md files, their contents are concatenated from root to cwd. #[tokio::test] async fn concatenates_root_and_cwd_docs() { let repo = tempfile::tempdir().expect("tempdir"); // Simulate a git repository. std::fs::write( repo.path().join(".git"), "gitdir: /path/to/actual/git/dir\n", ) .unwrap(); // Repo root doc. fs::write(repo.path().join("AGENTS.md"), "root doc").unwrap(); // Nested working directory with its own doc. let nested = repo.path().join("workspace/crate_a"); std::fs::create_dir_all(&nested).unwrap(); fs::write(nested.join("AGENTS.md"), "crate doc").unwrap(); let mut cfg = make_config(&repo, 4096, None); cfg.cwd = nested; let res = get_user_instructions(&cfg).await.expect("doc expected"); assert_eq!(res, "root doc\n\ncrate doc"); } /// AGENTS.override.md is preferred over AGENTS.md when both are present. #[tokio::test] async fn agents_local_md_preferred() { let tmp = tempfile::tempdir().expect("tempdir"); fs::write(tmp.path().join(DEFAULT_PROJECT_DOC_FILENAME), "versioned").unwrap(); fs::write(tmp.path().join(LOCAL_PROJECT_DOC_FILENAME), "local").unwrap(); let cfg = make_config(&tmp, 4096, None); let res = get_user_instructions(&cfg) .await .expect("local doc expected"); assert_eq!(res, "local"); let discovery = discover_project_doc_paths(&cfg).expect("discover paths"); assert_eq!(discovery.len(), 1); assert_eq!( discovery[0].file_name().unwrap().to_string_lossy(), LOCAL_PROJECT_DOC_FILENAME ); } /// When AGENTS.md is absent but a configured fallback exists, the fallback is used. #[tokio::test] async fn uses_configured_fallback_when_agents_missing() { let tmp = tempfile::tempdir().expect("tempdir"); fs::write(tmp.path().join("EXAMPLE.md"), "example instructions").unwrap(); let cfg = make_config_with_fallback(&tmp, 4096, None, &["EXAMPLE.md"]); let res = get_user_instructions(&cfg) .await .expect("fallback doc expected"); assert_eq!(res, "example instructions"); } /// AGENTS.md remains preferred when both AGENTS.md and fallbacks are present. #[tokio::test] async fn agents_md_preferred_over_fallbacks() { let tmp = tempfile::tempdir().expect("tempdir"); fs::write(tmp.path().join("AGENTS.md"), "primary").unwrap(); fs::write(tmp.path().join("EXAMPLE.md"), "secondary").unwrap(); let cfg = make_config_with_fallback(&tmp, 4096, None, &["EXAMPLE.md", ".example.md"]); let res = get_user_instructions(&cfg) .await .expect("AGENTS.md should win"); assert_eq!(res, "primary"); let discovery = discover_project_doc_paths(&cfg).expect("discover paths"); assert_eq!(discovery.len(), 1); assert!( discovery[0] .file_name() .unwrap() .to_string_lossy() .eq(DEFAULT_PROJECT_DOC_FILENAME) ); } #[tokio::test] async fn interpolates_commands_in_project_doc() { let tmp = tempfile::tempdir().expect("tempdir"); fs::write(tmp.path().join("data.txt"), "alpha\n").unwrap(); fs::write(tmp.path().join("AGENTS.md"), "value: {!cat data.txt}").unwrap(); let res = get_user_instructions(&make_config(&tmp, 4096, None)) .await .expect("doc expected"); assert_eq!(res, "value: alpha"); } #[tokio::test] async fn leaves_placeholder_when_command_fails() { let tmp = tempfile::tempdir().expect("tempdir"); fs::write(tmp.path().join("AGENTS.md"), "value: {!exit 42}").unwrap(); let res = get_user_instructions(&make_config(&tmp, 4096, None)) .await .expect("doc expected"); assert_eq!(res, "value: {!exit 42}"); } #[tokio::test] async fn interpolation_uses_doc_directory() { let repo = tempfile::tempdir().expect("tempdir"); let nested = repo.path().join("nested"); std::fs::create_dir_all(&nested).unwrap(); fs::write(nested.join("info.txt"), "nested\n").unwrap(); fs::write(nested.join("AGENTS.md"), "{!cat info.txt}").unwrap(); let mut cfg = make_config(&repo, 4096, None); cfg.cwd = nested.clone(); let res = get_user_instructions(&cfg).await.expect("doc expected"); assert_eq!(res, "nested"); } }