mirror of
https://github.com/openai/codex.git
synced 2026-04-29 02:41:12 +03:00
1204 lines
46 KiB
Markdown
1204 lines
46 KiB
Markdown
# PR #1730: agents.md path shown at tui launch
|
||
|
||
- URL: https://github.com/openai/codex/pull/1730
|
||
- Author: pap-openai
|
||
- Created: 2025-07-29 21:28:05 UTC
|
||
- Updated: 2025-08-06 22:53:44 UTC
|
||
- Changes: +278/-14, Files changed: 9, Commits: 14
|
||
|
||
## Description
|
||
|
||
show agents.md on login, decouple function to find and load agents.md, add unit tests for agents.md discovery
|
||
|
||
Preview:
|
||
|
||
with an agents.md
|
||
|
||
<img width="723" height="361" alt="image" src="https://github.com/user-attachments/assets/bfbf00b2-b99d-46d6-83b9-46865aad34f2" />
|
||
|
||
without
|
||
|
||
<img width="728" height="364" alt="image" src="https://github.com/user-attachments/assets/5fba9bf9-43b3-444b-b561-d64757d02088" />
|
||
|
||
## Full Diff
|
||
|
||
```diff
|
||
diff --git a/codex-rs/common/src/config_summary.rs b/codex-rs/common/src/config_summary.rs
|
||
index 39d524731f..616030a85a 100644
|
||
--- a/codex-rs/common/src/config_summary.rs
|
||
+++ b/codex-rs/common/src/config_summary.rs
|
||
@@ -1,4 +1,5 @@
|
||
use codex_core::WireApi;
|
||
+use codex_core::agents_doc_path_string;
|
||
use codex_core::config::Config;
|
||
|
||
use crate::sandbox_summary::summarize_sandbox_policy;
|
||
@@ -7,6 +8,10 @@ use crate::sandbox_summary::summarize_sandbox_policy;
|
||
pub fn create_config_summary_entries(config: &Config) -> Vec<(&'static str, String)> {
|
||
let mut entries = vec![
|
||
("workdir", config.cwd.display().to_string()),
|
||
+ (
|
||
+ "agents.md",
|
||
+ agents_doc_path_string(config).unwrap_or_else(|| "none".to_string()),
|
||
+ ),
|
||
("model", config.model.clone()),
|
||
("provider", config.model_provider_id.clone()),
|
||
("approval", config.approval_policy.to_string()),
|
||
diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
|
||
index 98d13b4cd6..64f0cb2bab 100644
|
||
--- a/codex-rs/core/src/codex.rs
|
||
+++ b/codex-rs/core/src/codex.rs
|
||
@@ -857,6 +857,7 @@ async fn submission_loop(
|
||
msg: EventMsg::SessionConfigured(SessionConfiguredEvent {
|
||
session_id,
|
||
model,
|
||
+ agents_doc_path: crate::project_doc::agents_doc_path_string(&config),
|
||
history_log_id,
|
||
history_entry_count,
|
||
}),
|
||
diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs
|
||
index c728bd3125..037476be0c 100644
|
||
--- a/codex-rs/core/src/lib.rs
|
||
+++ b/codex-rs/core/src/lib.rs
|
||
@@ -39,6 +39,8 @@ mod openai_model_info;
|
||
mod openai_tools;
|
||
pub mod plan_tool;
|
||
mod project_doc;
|
||
+pub use project_doc::agents_doc_path_string;
|
||
+pub use project_doc::discover_project_doc_path;
|
||
pub mod protocol;
|
||
mod rollout;
|
||
pub(crate) mod safety;
|
||
diff --git a/codex-rs/core/src/project_doc.rs b/codex-rs/core/src/project_doc.rs
|
||
index 9f46159d1d..56807da1ce 100644
|
||
--- a/codex-rs/core/src/project_doc.rs
|
||
+++ b/codex-rs/core/src/project_doc.rs
|
||
@@ -12,7 +12,10 @@
|
||
//! exists, the search stops – we do **not** walk past the Git root.
|
||
|
||
use crate::config::Config;
|
||
+use std::fs;
|
||
+use std::io::Read as _;
|
||
use std::path::Path;
|
||
+use std::path::PathBuf;
|
||
use tokio::io::AsyncReadExt;
|
||
use tracing::error;
|
||
|
||
@@ -49,6 +52,10 @@ pub(crate) async fn get_user_instructions(config: &Config) -> Option<String> {
|
||
/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as
|
||
/// `Err` so callers can decide how to handle them.
|
||
async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
|
||
+ if config.project_doc_max_bytes == 0 {
|
||
+ return Ok(None);
|
||
+ }
|
||
+
|
||
let max_bytes = config.project_doc_max_bytes;
|
||
|
||
// Attempt to load from the working directory first.
|
||
@@ -90,6 +97,124 @@ async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
|
||
Ok(None)
|
||
}
|
||
|
||
+/// Public helper that returns the discovered AGENTS.md path.
|
||
+/// Returns `Ok(None)` when no suitable file is found or `project_doc_max_bytes == 0`.
|
||
+pub fn discover_project_doc_path(config: &Config) -> std::io::Result<Option<std::path::PathBuf>> {
|
||
+ if config.project_doc_max_bytes == 0 {
|
||
+ return Ok(None);
|
||
+ }
|
||
+
|
||
+ discover_project_doc_path_from_dir(&config.cwd, CANDIDATE_FILENAMES)
|
||
+}
|
||
+
|
||
+fn discover_project_doc_path_from_dir(
|
||
+ start_dir: &Path,
|
||
+ names: &[&str],
|
||
+) -> std::io::Result<Option<std::path::PathBuf>> {
|
||
+ // Canonicalize the path so that we do not end up in an infinite loop when
|
||
+ // `cwd` contains `..` components.
|
||
+ let mut dir = start_dir.to_path_buf();
|
||
+ if let Ok(canon) = dir.canonicalize() {
|
||
+ dir = canon;
|
||
+ }
|
||
+
|
||
+ // Attempt in the working directory first.
|
||
+ if let Some(path) = first_nonempty_candidate_in_dir(&dir, names) {
|
||
+ return Ok(Some(path));
|
||
+ }
|
||
+
|
||
+ // Walk up towards the filesystem root, stopping once we encounter the Git root.
|
||
+ while let Some(parent) = dir.parent() {
|
||
+ let git_marker = dir.join(".git");
|
||
+ let git_exists = match fs::metadata(&git_marker) {
|
||
+ Ok(_) => true,
|
||
+ Err(e) if e.kind() == std::io::ErrorKind::NotFound => false,
|
||
+ Err(e) => return Err(e),
|
||
+ };
|
||
+
|
||
+ if git_exists {
|
||
+ if let Some(path) = first_nonempty_candidate_in_dir(&dir, names) {
|
||
+ return Ok(Some(path));
|
||
+ }
|
||
+ break; // do not walk past the Git root
|
||
+ }
|
||
+
|
||
+ dir = parent.to_path_buf();
|
||
+ }
|
||
+
|
||
+ Ok(None)
|
||
+}
|
||
+
|
||
+/// Return a human‑readable description of the AGENTS.md path(s) that will be
|
||
+/// loaded for this session, or `None` if neither global nor project docs are
|
||
+/// present.
|
||
+///
|
||
+/// This mirrors the discovery logic used by `get_user_instructions()`:
|
||
+/// - If `~/.codex/AGENTS.md` (global) is non‑empty, it is included.
|
||
+/// - If a project AGENTS.md is found (respecting the byte limit and git root
|
||
+/// stop), it is included.
|
||
+/// - When the project_doc_max_bytes is set to 0, project docs are disabled.
|
||
+pub fn agents_doc_path_string(config: &Config) -> Option<String> {
|
||
+ let mut parts: Vec<String> = Vec::new();
|
||
+
|
||
+ // Global AGENTS.md in CODEX_HOME.
|
||
+ if config.user_instructions.is_some() {
|
||
+ let global = config.codex_home.join("AGENTS.md");
|
||
+ parts.push(global.display().to_string());
|
||
+ }
|
||
+
|
||
+ // Project AGENTS.md, unless disabled via byte‑limit == 0.
|
||
+ if config.project_doc_max_bytes > 0 {
|
||
+ if let Ok(Some(p)) = discover_project_doc_path(config) {
|
||
+ parts.push(p.display().to_string());
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if parts.is_empty() {
|
||
+ None
|
||
+ } else {
|
||
+ Some(parts.join(" + "))
|
||
+ }
|
||
+}
|
||
+
|
||
+fn first_nonempty_candidate_in_dir(dir: &Path, names: &[&str]) -> Option<PathBuf> {
|
||
+ for name in names {
|
||
+ let candidate = dir.join(name);
|
||
+ // Fast path: must exist and be a file.
|
||
+ let md = match std::fs::metadata(&candidate) {
|
||
+ Ok(m) if m.is_file() => m,
|
||
+ _ => continue,
|
||
+ };
|
||
+
|
||
+ // If the file is zero bytes, skip without reading.
|
||
+ if md.len() == 0 {
|
||
+ continue;
|
||
+ }
|
||
+
|
||
+ // Read up to a modest limit to determine if the contents are effectively empty after trimming.
|
||
+ // Use the same limit as `project_doc_max_bytes` would permit by default.
|
||
+ const MAX_PEEK_BYTES: usize = 8 * 1024;
|
||
+ let mut file = match std::fs::File::open(&candidate) {
|
||
+ Ok(f) => f,
|
||
+ Err(_) => continue,
|
||
+ };
|
||
+ let mut buf = Vec::with_capacity(std::cmp::min(md.len() as usize, MAX_PEEK_BYTES));
|
||
+ if std::io::Read::by_ref(&mut file)
|
||
+ .take(MAX_PEEK_BYTES as u64)
|
||
+ .read_to_end(&mut buf)
|
||
+ .is_err()
|
||
+ {
|
||
+ continue;
|
||
+ }
|
||
+ let s = String::from_utf8_lossy(&buf);
|
||
+ if s.trim().is_empty() {
|
||
+ continue;
|
||
+ }
|
||
+ return Some(candidate);
|
||
+ }
|
||
+ None
|
||
+}
|
||
+
|
||
/// Attempt to load the first candidate file found in `dir`. Returns the file
|
||
/// contents (truncated if it exceeds `max_bytes`) when successful.
|
||
async fn load_first_candidate(
|
||
@@ -237,6 +362,68 @@ mod tests {
|
||
assert_eq!(res, "root level doc");
|
||
}
|
||
|
||
+ /// Test if AGENTS.md located in the current working directory is preferred over the repo root.
|
||
+ #[tokio::test]
|
||
+ async fn prefers_cwd_doc_over_repo_root() {
|
||
+ let repo = tempfile::tempdir().expect("tempdir");
|
||
+
|
||
+ // Simulate a git repository at repo root.
|
||
+ std::fs::write(repo.path().join(".git"), "gitdir: /dev/null\n").unwrap();
|
||
+
|
||
+ // Create AGENTS.md at repo root and in a nested cwd.
|
||
+ fs::write(repo.path().join("AGENTS.md"), "root level doc").unwrap();
|
||
+ let nested = repo.path().join("workspace/crate_b");
|
||
+ std::fs::create_dir_all(&nested).unwrap();
|
||
+ fs::write(nested.join("AGENTS.md"), "nested cwd doc").unwrap();
|
||
+
|
||
+ // Build config pointing at the nested dir.
|
||
+ let mut cfg = make_config(&repo, 4096, None);
|
||
+ cfg.cwd = nested.clone();
|
||
+
|
||
+ // Path discovery should prefer the nested cwd doc.
|
||
+ let discovered = super::discover_project_doc_path(&cfg)
|
||
+ .expect("discovery should succeed")
|
||
+ .expect("path should be found");
|
||
+ let discovered_canon = fs::canonicalize(&discovered).expect("canonicalize discovered");
|
||
+ let expected_canon =
|
||
+ fs::canonicalize(nested.join("AGENTS.md")).expect("canonicalize expected");
|
||
+ assert_eq!(discovered_canon, expected_canon);
|
||
+
|
||
+ // get_user_instructions should load the nested document contents.
|
||
+ let res = get_user_instructions(&cfg).await.expect("doc expected");
|
||
+ assert_eq!(res, "nested cwd doc");
|
||
+ }
|
||
+
|
||
+ /// Test if AGENTS.md at the repo root is used when none exists in cwd.
|
||
+ #[tokio::test]
|
||
+ async fn falls_back_to_repo_root_when_cwd_missing_doc() {
|
||
+ let repo = tempfile::tempdir().expect("tempdir");
|
||
+
|
||
+ // Simulate a git repository at repo root.
|
||
+ std::fs::write(repo.path().join(".git"), "gitdir: /dev/null\n").unwrap();
|
||
+
|
||
+ // Create AGENTS.md only at repo root.
|
||
+ fs::write(repo.path().join("AGENTS.md"), "root level doc").unwrap();
|
||
+
|
||
+ // Nested cwd without its own AGENTS.md.
|
||
+ let nested = repo.path().join("nested/dir");
|
||
+ std::fs::create_dir_all(&nested).unwrap();
|
||
+
|
||
+ let mut cfg = make_config(&repo, 4096, None);
|
||
+ cfg.cwd = nested;
|
||
+
|
||
+ let discovered = super::discover_project_doc_path(&cfg)
|
||
+ .expect("discovery should succeed")
|
||
+ .expect("path should be found");
|
||
+ let discovered_canon = fs::canonicalize(&discovered).expect("canonicalize discovered");
|
||
+ let expected_canon =
|
||
+ fs::canonicalize(repo.path().join("AGENTS.md")).expect("canonicalize expected");
|
||
+ assert_eq!(discovered_canon, expected_canon);
|
||
+
|
||
+ let res = get_user_instructions(&cfg).await.expect("doc expected");
|
||
+ assert_eq!(res, "root level doc");
|
||
+ }
|
||
+
|
||
/// Explicitly setting the byte-limit to zero disables project docs.
|
||
#[tokio::test]
|
||
async fn zero_byte_limit_disables_docs() {
|
||
diff --git a/codex-rs/core/src/protocol.rs b/codex-rs/core/src/protocol.rs
|
||
index 052806dd97..b861bda7e8 100644
|
||
--- a/codex-rs/core/src/protocol.rs
|
||
+++ b/codex-rs/core/src/protocol.rs
|
||
@@ -648,6 +648,10 @@ pub struct SessionConfiguredEvent {
|
||
/// Tell the client what model is being queried.
|
||
pub model: String,
|
||
|
||
+ /// The path(s) to AGENTS.md that were loaded for this session, if any.
|
||
+ #[serde(skip_serializing_if = "Option::is_none")]
|
||
+ pub agents_doc_path: Option<String>,
|
||
+
|
||
/// Identifier of the history log file (inode on Unix, 0 otherwise).
|
||
pub history_log_id: u64,
|
||
|
||
@@ -713,6 +717,7 @@ mod tests {
|
||
msg: EventMsg::SessionConfigured(SessionConfiguredEvent {
|
||
session_id,
|
||
model: "codex-mini-latest".to_string(),
|
||
+ agents_doc_path: None,
|
||
history_log_id: 0,
|
||
history_entry_count: 0,
|
||
}),
|
||
diff --git a/codex-rs/exec/src/event_processor_with_human_output.rs b/codex-rs/exec/src/event_processor_with_human_output.rs
|
||
index 6b03ed7882..2e4a206357 100644
|
||
--- a/codex-rs/exec/src/event_processor_with_human_output.rs
|
||
+++ b/codex-rs/exec/src/event_processor_with_human_output.rs
|
||
@@ -494,6 +494,7 @@ impl EventProcessor for EventProcessorWithHumanOutput {
|
||
model,
|
||
history_log_id: _,
|
||
history_entry_count: _,
|
||
+ ..
|
||
} = session_configured_event;
|
||
|
||
ts_println!(
|
||
diff --git a/codex-rs/mcp-server/src/mcp_protocol.rs b/codex-rs/mcp-server/src/mcp_protocol.rs
|
||
index 2f8858a37b..8104d4ac3e 100644
|
||
--- a/codex-rs/mcp-server/src/mcp_protocol.rs
|
||
+++ b/codex-rs/mcp-server/src/mcp_protocol.rs
|
||
@@ -906,6 +906,7 @@ mod tests {
|
||
msg: EventMsg::SessionConfigured(codex_core::protocol::SessionConfiguredEvent {
|
||
session_id: uuid!("67e55044-10b1-426f-9247-bb680e5fe0c8"),
|
||
model: "codex-mini-latest".into(),
|
||
+ agents_doc_path: None,
|
||
history_log_id: 42,
|
||
history_entry_count: 3,
|
||
}),
|
||
diff --git a/codex-rs/mcp-server/src/outgoing_message.rs b/codex-rs/mcp-server/src/outgoing_message.rs
|
||
index e7b0b9b63c..74079d3169 100644
|
||
--- a/codex-rs/mcp-server/src/outgoing_message.rs
|
||
+++ b/codex-rs/mcp-server/src/outgoing_message.rs
|
||
@@ -242,6 +242,7 @@ mod tests {
|
||
msg: EventMsg::SessionConfigured(SessionConfiguredEvent {
|
||
session_id: Uuid::new_v4(),
|
||
model: "gpt-4o".to_string(),
|
||
+ agents_doc_path: None,
|
||
history_log_id: 1,
|
||
history_entry_count: 1000,
|
||
}),
|
||
@@ -282,6 +283,7 @@ mod tests {
|
||
let session_configured_event = SessionConfiguredEvent {
|
||
session_id: Uuid::new_v4(),
|
||
model: "gpt-4o".to_string(),
|
||
+ agents_doc_path: None,
|
||
history_log_id: 1,
|
||
history_entry_count: 1000,
|
||
};
|
||
diff --git a/codex-rs/tui/src/history_cell.rs b/codex-rs/tui/src/history_cell.rs
|
||
index c577ce17a0..366046eca5 100644
|
||
--- a/codex-rs/tui/src/history_cell.rs
|
||
+++ b/codex-rs/tui/src/history_cell.rs
|
||
@@ -166,12 +166,8 @@ impl HistoryCell {
|
||
event: SessionConfiguredEvent,
|
||
is_first_event: bool,
|
||
) -> Self {
|
||
- let SessionConfiguredEvent {
|
||
- model,
|
||
- session_id: _,
|
||
- history_log_id: _,
|
||
- history_entry_count: _,
|
||
- } = event;
|
||
+ let model = event.model.clone();
|
||
+ let agents_doc_path = event.agents_doc_path.clone();
|
||
if is_first_event {
|
||
let cwd_str = match relativize_to_home(&config.cwd) {
|
||
Some(rel) if !rel.as_os_str().is_empty() => format!("~/{}", rel.display()),
|
||
@@ -179,7 +175,7 @@ impl HistoryCell {
|
||
None => config.cwd.display().to_string(),
|
||
};
|
||
|
||
- let lines: Vec<Line<'static>> = vec![
|
||
+ let mut lines: Vec<Line<'static>> = vec![
|
||
Line::from(vec![
|
||
Span::raw(">_ ").dim(),
|
||
Span::styled(
|
||
@@ -189,14 +185,78 @@ impl HistoryCell {
|
||
Span::raw(format!(" {cwd_str}")).dim(),
|
||
]),
|
||
Line::from("".dim()),
|
||
- Line::from(" Try one of the following commands to get started:".dim()),
|
||
- Line::from("".dim()),
|
||
- Line::from(format!(" 1. /init - {}", SlashCommand::Init.description()).dim()),
|
||
- Line::from(format!(" 2. /status - {}", SlashCommand::Status.description()).dim()),
|
||
- Line::from(format!(" 3. /compact - {}", SlashCommand::Compact.description()).dim()),
|
||
- Line::from(format!(" 4. /new - {}", SlashCommand::New.description()).dim()),
|
||
- Line::from("".dim()),
|
||
];
|
||
+
|
||
+ // If AGENTS.md is configured (either user-level or project-level),
|
||
+ // show a concise summary and omit the /init hint, but still show the other onboarding commands.
|
||
+ let show_init = agents_doc_path.is_none();
|
||
+
|
||
+ if let Some(paths_str) = agents_doc_path {
|
||
+ let global_path = config.codex_home.join("AGENTS.md");
|
||
+ let parts: Vec<String> = paths_str
|
||
+ .split(" + ")
|
||
+ .map(|s| s.trim().to_string())
|
||
+ .filter(|s| !s.is_empty())
|
||
+ .collect();
|
||
+
|
||
+ let mut user_path: Option<String> = None;
|
||
+ let mut project_path: Option<String> = None;
|
||
+ for p in parts {
|
||
+ if p == global_path.display().to_string() {
|
||
+ user_path = Some(p);
|
||
+ } else {
|
||
+ project_path = Some(p);
|
||
+ }
|
||
+ }
|
||
+
|
||
+ let summary_line = match (user_path, project_path) {
|
||
+ (Some(u), Some(pr)) => {
|
||
+ format!(" Using user instructions ({u}) and project instructions ({pr})")
|
||
+ }
|
||
+ (Some(u), None) => format!("Using user instructions ({u})"),
|
||
+ (None, Some(pr)) => format!(" Using project instructions ({pr})"),
|
||
+ (None, None) => String::new(),
|
||
+ };
|
||
+
|
||
+ if !summary_line.is_empty() {
|
||
+ lines.push(Line::from(summary_line.dim()));
|
||
+ lines.push(Line::from("".dim()));
|
||
+ }
|
||
+ }
|
||
+
|
||
+ // Onboarding hints, with index based on whether /init is shown
|
||
+ lines.push(Line::from(
|
||
+ " Try one of the following commands to get started:".dim(),
|
||
+ ));
|
||
+ lines.push(Line::from("".dim()));
|
||
+
|
||
+ let mut cmd_index = 1;
|
||
+ if show_init {
|
||
+ lines.push(Line::from(
|
||
+ format!(" {cmd_index}. /init - {}", SlashCommand::Init.description()).dim(),
|
||
+ ));
|
||
+ cmd_index += 1;
|
||
+ }
|
||
+ lines.push(Line::from(
|
||
+ format!(
|
||
+ " {cmd_index}. /status - {}",
|
||
+ SlashCommand::Status.description()
|
||
+ )
|
||
+ .dim(),
|
||
+ ));
|
||
+ cmd_index += 1;
|
||
+ lines.push(Line::from(
|
||
+ format!(
|
||
+ " {cmd_index}. /compact - {}",
|
||
+ SlashCommand::Compact.description()
|
||
+ )
|
||
+ .dim(),
|
||
+ ));
|
||
+ cmd_index += 1;
|
||
+ lines.push(Line::from(
|
||
+ format!(" {cmd_index}. /new - {}", SlashCommand::New.description()).dim(),
|
||
+ ));
|
||
+ lines.push(Line::from("".dim()));
|
||
HistoryCell::WelcomeMessage {
|
||
view: TextBlock::new(lines),
|
||
}
|
||
```
|
||
|
||
## Review Comments
|
||
|
||
### codex-rs/core/src/project_doc.rs
|
||
|
||
- Created: 2025-07-30 16:10:19 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2243193435
|
||
|
||
```diff
|
||
@@ -90,48 +79,94 @@ async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
|
||
Ok(None)
|
||
}
|
||
|
||
-/// Attempt to load the first candidate file found in `dir`. Returns the file
|
||
-/// contents (truncated if it exceeds `max_bytes`) when successful.
|
||
-async fn load_first_candidate(
|
||
+fn find_non_empty_candidate(
|
||
dir: &Path,
|
||
names: &[&str],
|
||
max_bytes: usize,
|
||
-) -> std::io::Result<Option<String>> {
|
||
+) -> std::io::Result<Option<std::path::PathBuf>> {
|
||
+ use std::fs::File;
|
||
+ use std::io::Read;
|
||
+
|
||
for name in names {
|
||
let candidate = dir.join(name);
|
||
-
|
||
- let file = match tokio::fs::File::open(&candidate).await {
|
||
+ let mut file = match File::open(&candidate) {
|
||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue,
|
||
Err(e) => return Err(e),
|
||
Ok(f) => f,
|
||
};
|
||
|
||
- let size = file.metadata().await?.len();
|
||
-
|
||
- let reader = tokio::io::BufReader::new(file);
|
||
- let mut data = Vec::with_capacity(std::cmp::min(size as usize, max_bytes));
|
||
- let mut limited = reader.take(max_bytes as u64);
|
||
- limited.read_to_end(&mut data).await?;
|
||
-
|
||
- if size as usize > max_bytes {
|
||
- tracing::warn!(
|
||
- "Project doc `{}` exceeds {max_bytes} bytes - truncating.",
|
||
- candidate.display(),
|
||
- );
|
||
- }
|
||
-
|
||
- let contents = String::from_utf8_lossy(&data).to_string();
|
||
+ let size = file.metadata()?.len() as usize;
|
||
+ let to_read = std::cmp::min(size, max_bytes);
|
||
+ let mut data = vec![0u8; to_read];
|
||
+ let read_n = file.read(&mut data)?;
|
||
+ let contents = String::from_utf8_lossy(&data[..read_n]).to_string();
|
||
if contents.trim().is_empty() {
|
||
- // Empty file – treat as not found.
|
||
continue;
|
||
}
|
||
|
||
- return Ok(Some(contents));
|
||
+ return Ok(Some(candidate));
|
||
}
|
||
|
||
Ok(None)
|
||
}
|
||
|
||
+/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single
|
||
+/// string of instructions.
|
||
+pub(crate) async fn get_user_instructions(config: &Config) -> Option<String> {
|
||
+ match find_project_doc(config).await {
|
||
+ Ok(Some(project_doc)) => match &config.user_instructions {
|
||
+ Some(original_instructions) => Some(format!(
|
||
+ "{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}"
|
||
+ )),
|
||
+ None => Some(project_doc),
|
||
+ },
|
||
+ Ok(None) => config.user_instructions.clone(),
|
||
+ Err(e) => {
|
||
+ error!("error trying to find project doc: {e:#}");
|
||
+ config.user_instructions.clone()
|
||
+ }
|
||
+ }
|
||
+}
|
||
+
|
||
+/// Attempt to locate and load the project documentation. Currently, the search
|
||
+/// starts from `Config::cwd`, but if we may want to consider other directories
|
||
+/// in the future, e.g., additional writable directories in the `SandboxPolicy`.
|
||
+///
|
||
+/// On success returns `Ok(Some(contents))`. If no documentation file is found
|
||
+/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as
|
||
+/// `Err` so callers can decide how to handle them.
|
||
+async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
|
||
+ use tokio::io::BufReader;
|
||
```
|
||
|
||
> Please move to the top of the file.
|
||
|
||
- Created: 2025-07-30 16:11:21 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2243195675
|
||
|
||
```diff
|
||
@@ -90,48 +79,94 @@ async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
|
||
Ok(None)
|
||
}
|
||
|
||
-/// Attempt to load the first candidate file found in `dir`. Returns the file
|
||
-/// contents (truncated if it exceeds `max_bytes`) when successful.
|
||
-async fn load_first_candidate(
|
||
+fn find_non_empty_candidate(
|
||
dir: &Path,
|
||
names: &[&str],
|
||
max_bytes: usize,
|
||
-) -> std::io::Result<Option<String>> {
|
||
+) -> std::io::Result<Option<std::path::PathBuf>> {
|
||
+ use std::fs::File;
|
||
```
|
||
|
||
> Move these to the top.
|
||
|
||
- Created: 2025-07-30 16:15:39 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2243205576
|
||
|
||
```diff
|
||
@@ -23,65 +23,54 @@ const CANDIDATE_FILENAMES: &[&str] = &["AGENTS.md"];
|
||
/// be concatenated with the following separator.
|
||
const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n";
|
||
|
||
-/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single
|
||
-/// string of instructions.
|
||
-pub(crate) async fn get_user_instructions(config: &Config) -> Option<String> {
|
||
- match find_project_doc(config).await {
|
||
- Ok(Some(project_doc)) => match &config.user_instructions {
|
||
- Some(original_instructions) => Some(format!(
|
||
- "{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}"
|
||
- )),
|
||
- None => Some(project_doc),
|
||
- },
|
||
- Ok(None) => config.user_instructions.clone(),
|
||
- Err(e) => {
|
||
- error!("error trying to find project doc: {e:#}");
|
||
- config.user_instructions.clone()
|
||
- }
|
||
+/// Public helper that returns the discovered AGENTS.md path.
|
||
+/// Returns `Ok(None)` when no suitable file is found or
|
||
+/// `project_doc_max_bytes == 0`.
|
||
+pub fn discover_project_doc_path(config: &Config) -> std::io::Result<Option<std::path::PathBuf>> {
|
||
+ if config.project_doc_max_bytes == 0 {
|
||
+ return Ok(None);
|
||
}
|
||
-}
|
||
|
||
-/// Attempt to locate and load the project documentation. Currently, the search
|
||
-/// starts from `Config::cwd`, but if we may want to consider other directories
|
||
-/// in the future, e.g., additional writable directories in the `SandboxPolicy`.
|
||
-///
|
||
-/// On success returns `Ok(Some(contents))`. If no documentation file is found
|
||
-/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as
|
||
-/// `Err` so callers can decide how to handle them.
|
||
-async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
|
||
- let max_bytes = config.project_doc_max_bytes;
|
||
-
|
||
- // Attempt to load from the working directory first.
|
||
- if let Some(doc) = load_first_candidate(&config.cwd, CANDIDATE_FILENAMES, max_bytes).await? {
|
||
- return Ok(Some(doc));
|
||
- }
|
||
+ discover_project_doc_path_from_dir(
|
||
+ &config.cwd,
|
||
+ CANDIDATE_FILENAMES,
|
||
+ config.project_doc_max_bytes,
|
||
+ )
|
||
```
|
||
|
||
> Arguably, using an expression is more idiomatic (applies to lines 30-38, but GitHub wouldn't let me select it). Also, I would take this opportunity to list the more common case first.
|
||
>
|
||
> ```suggestion
|
||
> if config.project_doc_max_bytes > 0 {
|
||
> discover_project_doc_path_from_dir(
|
||
> &config.cwd,
|
||
> CANDIDATE_FILENAMES,
|
||
> config.project_doc_max_bytes,
|
||
> )
|
||
> } else {
|
||
> Ok(None)
|
||
> }
|
||
> ```
|
||
|
||
- Created: 2025-07-30 16:16:27 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2243207508
|
||
|
||
```diff
|
||
@@ -23,65 +23,54 @@ const CANDIDATE_FILENAMES: &[&str] = &["AGENTS.md"];
|
||
/// be concatenated with the following separator.
|
||
const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n";
|
||
|
||
-/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single
|
||
-/// string of instructions.
|
||
-pub(crate) async fn get_user_instructions(config: &Config) -> Option<String> {
|
||
- match find_project_doc(config).await {
|
||
- Ok(Some(project_doc)) => match &config.user_instructions {
|
||
- Some(original_instructions) => Some(format!(
|
||
- "{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}"
|
||
- )),
|
||
- None => Some(project_doc),
|
||
- },
|
||
- Ok(None) => config.user_instructions.clone(),
|
||
- Err(e) => {
|
||
- error!("error trying to find project doc: {e:#}");
|
||
- config.user_instructions.clone()
|
||
- }
|
||
+/// Public helper that returns the discovered AGENTS.md path.
|
||
+/// Returns `Ok(None)` when no suitable file is found or
|
||
+/// `project_doc_max_bytes == 0`.
|
||
+pub fn discover_project_doc_path(config: &Config) -> std::io::Result<Option<std::path::PathBuf>> {
|
||
+ if config.project_doc_max_bytes == 0 {
|
||
+ return Ok(None);
|
||
}
|
||
-}
|
||
|
||
-/// Attempt to locate and load the project documentation. Currently, the search
|
||
-/// starts from `Config::cwd`, but if we may want to consider other directories
|
||
-/// in the future, e.g., additional writable directories in the `SandboxPolicy`.
|
||
-///
|
||
-/// On success returns `Ok(Some(contents))`. If no documentation file is found
|
||
-/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as
|
||
-/// `Err` so callers can decide how to handle them.
|
||
-async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
|
||
- let max_bytes = config.project_doc_max_bytes;
|
||
-
|
||
- // Attempt to load from the working directory first.
|
||
- if let Some(doc) = load_first_candidate(&config.cwd, CANDIDATE_FILENAMES, max_bytes).await? {
|
||
- return Ok(Some(doc));
|
||
- }
|
||
+ discover_project_doc_path_from_dir(
|
||
+ &config.cwd,
|
||
+ CANDIDATE_FILENAMES,
|
||
+ config.project_doc_max_bytes,
|
||
+ )
|
||
+}
|
||
|
||
- // Walk up towards the filesystem root, stopping once we encounter the Git
|
||
- // repository root. The presence of **either** a `.git` *file* or
|
||
- // *directory* counts.
|
||
- let mut dir = config.cwd.clone();
|
||
+fn discover_project_doc_path_from_dir(
|
||
+ start_dir: &Path,
|
||
+ names: &[&str],
|
||
+ max_bytes: usize,
|
||
+) -> std::io::Result<Option<std::path::PathBuf>> {
|
||
+ use std::fs;
|
||
```
|
||
|
||
> to top
|
||
|
||
- Created: 2025-07-30 16:17:51 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2243211102
|
||
|
||
```diff
|
||
@@ -23,65 +23,54 @@ const CANDIDATE_FILENAMES: &[&str] = &["AGENTS.md"];
|
||
/// be concatenated with the following separator.
|
||
const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n";
|
||
|
||
-/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single
|
||
-/// string of instructions.
|
||
-pub(crate) async fn get_user_instructions(config: &Config) -> Option<String> {
|
||
- match find_project_doc(config).await {
|
||
- Ok(Some(project_doc)) => match &config.user_instructions {
|
||
- Some(original_instructions) => Some(format!(
|
||
- "{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}"
|
||
- )),
|
||
- None => Some(project_doc),
|
||
- },
|
||
- Ok(None) => config.user_instructions.clone(),
|
||
- Err(e) => {
|
||
- error!("error trying to find project doc: {e:#}");
|
||
- config.user_instructions.clone()
|
||
- }
|
||
+/// Public helper that returns the discovered AGENTS.md path.
|
||
+/// Returns `Ok(None)` when no suitable file is found or
|
||
+/// `project_doc_max_bytes == 0`.
|
||
+pub fn discover_project_doc_path(config: &Config) -> std::io::Result<Option<std::path::PathBuf>> {
|
||
+ if config.project_doc_max_bytes == 0 {
|
||
+ return Ok(None);
|
||
}
|
||
-}
|
||
|
||
-/// Attempt to locate and load the project documentation. Currently, the search
|
||
-/// starts from `Config::cwd`, but if we may want to consider other directories
|
||
-/// in the future, e.g., additional writable directories in the `SandboxPolicy`.
|
||
-///
|
||
-/// On success returns `Ok(Some(contents))`. If no documentation file is found
|
||
-/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as
|
||
-/// `Err` so callers can decide how to handle them.
|
||
-async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
|
||
- let max_bytes = config.project_doc_max_bytes;
|
||
-
|
||
- // Attempt to load from the working directory first.
|
||
- if let Some(doc) = load_first_candidate(&config.cwd, CANDIDATE_FILENAMES, max_bytes).await? {
|
||
- return Ok(Some(doc));
|
||
- }
|
||
+ discover_project_doc_path_from_dir(
|
||
+ &config.cwd,
|
||
+ CANDIDATE_FILENAMES,
|
||
+ config.project_doc_max_bytes,
|
||
+ )
|
||
+}
|
||
|
||
- // Walk up towards the filesystem root, stopping once we encounter the Git
|
||
- // repository root. The presence of **either** a `.git` *file* or
|
||
- // *directory* counts.
|
||
- let mut dir = config.cwd.clone();
|
||
+fn discover_project_doc_path_from_dir(
|
||
+ start_dir: &Path,
|
||
+ names: &[&str],
|
||
+ max_bytes: usize,
|
||
+) -> std::io::Result<Option<std::path::PathBuf>> {
|
||
+ use std::fs;
|
||
|
||
// Canonicalize the path so that we do not end up in an infinite loop when
|
||
// `cwd` contains `..` components.
|
||
```
|
||
|
||
> ```suggestion
|
||
> // `start_dir` contains `..` components.
|
||
> ```
|
||
|
||
- Created: 2025-07-30 16:18:41 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2243214469
|
||
|
||
```diff
|
||
@@ -23,65 +23,54 @@ const CANDIDATE_FILENAMES: &[&str] = &["AGENTS.md"];
|
||
/// be concatenated with the following separator.
|
||
const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n";
|
||
|
||
-/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single
|
||
-/// string of instructions.
|
||
-pub(crate) async fn get_user_instructions(config: &Config) -> Option<String> {
|
||
- match find_project_doc(config).await {
|
||
- Ok(Some(project_doc)) => match &config.user_instructions {
|
||
- Some(original_instructions) => Some(format!(
|
||
- "{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}"
|
||
- )),
|
||
- None => Some(project_doc),
|
||
- },
|
||
- Ok(None) => config.user_instructions.clone(),
|
||
- Err(e) => {
|
||
- error!("error trying to find project doc: {e:#}");
|
||
- config.user_instructions.clone()
|
||
- }
|
||
+/// Public helper that returns the discovered AGENTS.md path.
|
||
+/// Returns `Ok(None)` when no suitable file is found or
|
||
+/// `project_doc_max_bytes == 0`.
|
||
+pub fn discover_project_doc_path(config: &Config) -> std::io::Result<Option<std::path::PathBuf>> {
|
||
+ if config.project_doc_max_bytes == 0 {
|
||
+ return Ok(None);
|
||
}
|
||
-}
|
||
|
||
-/// Attempt to locate and load the project documentation. Currently, the search
|
||
-/// starts from `Config::cwd`, but if we may want to consider other directories
|
||
-/// in the future, e.g., additional writable directories in the `SandboxPolicy`.
|
||
-///
|
||
-/// On success returns `Ok(Some(contents))`. If no documentation file is found
|
||
-/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as
|
||
-/// `Err` so callers can decide how to handle them.
|
||
-async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
|
||
- let max_bytes = config.project_doc_max_bytes;
|
||
-
|
||
- // Attempt to load from the working directory first.
|
||
- if let Some(doc) = load_first_candidate(&config.cwd, CANDIDATE_FILENAMES, max_bytes).await? {
|
||
- return Ok(Some(doc));
|
||
- }
|
||
+ discover_project_doc_path_from_dir(
|
||
+ &config.cwd,
|
||
+ CANDIDATE_FILENAMES,
|
||
+ config.project_doc_max_bytes,
|
||
+ )
|
||
+}
|
||
|
||
- // Walk up towards the filesystem root, stopping once we encounter the Git
|
||
- // repository root. The presence of **either** a `.git` *file* or
|
||
- // *directory* counts.
|
||
- let mut dir = config.cwd.clone();
|
||
+fn discover_project_doc_path_from_dir(
|
||
+ start_dir: &Path,
|
||
+ names: &[&str],
|
||
+ max_bytes: usize,
|
||
+) -> std::io::Result<Option<std::path::PathBuf>> {
|
||
+ use std::fs;
|
||
|
||
// Canonicalize the path so that we do not end up in an infinite loop when
|
||
// `cwd` contains `..` components.
|
||
```
|
||
|
||
> Though I assume I wrote this comment, `Config.cwd` should not have `..` components, but maybe it could if you use `--cd`...?
|
||
|
||
- Created: 2025-07-30 16:23:57 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2243239863
|
||
|
||
```diff
|
||
@@ -23,65 +23,54 @@ const CANDIDATE_FILENAMES: &[&str] = &["AGENTS.md"];
|
||
/// be concatenated with the following separator.
|
||
const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n";
|
||
|
||
-/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single
|
||
-/// string of instructions.
|
||
-pub(crate) async fn get_user_instructions(config: &Config) -> Option<String> {
|
||
- match find_project_doc(config).await {
|
||
- Ok(Some(project_doc)) => match &config.user_instructions {
|
||
- Some(original_instructions) => Some(format!(
|
||
- "{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}"
|
||
- )),
|
||
- None => Some(project_doc),
|
||
- },
|
||
- Ok(None) => config.user_instructions.clone(),
|
||
- Err(e) => {
|
||
- error!("error trying to find project doc: {e:#}");
|
||
- config.user_instructions.clone()
|
||
- }
|
||
+/// Public helper that returns the discovered AGENTS.md path.
|
||
+/// Returns `Ok(None)` when no suitable file is found or
|
||
+/// `project_doc_max_bytes == 0`.
|
||
+pub fn discover_project_doc_path(config: &Config) -> std::io::Result<Option<std::path::PathBuf>> {
|
||
+ if config.project_doc_max_bytes == 0 {
|
||
+ return Ok(None);
|
||
}
|
||
-}
|
||
|
||
-/// Attempt to locate and load the project documentation. Currently, the search
|
||
-/// starts from `Config::cwd`, but if we may want to consider other directories
|
||
-/// in the future, e.g., additional writable directories in the `SandboxPolicy`.
|
||
-///
|
||
-/// On success returns `Ok(Some(contents))`. If no documentation file is found
|
||
-/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as
|
||
-/// `Err` so callers can decide how to handle them.
|
||
-async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
|
||
- let max_bytes = config.project_doc_max_bytes;
|
||
-
|
||
- // Attempt to load from the working directory first.
|
||
- if let Some(doc) = load_first_candidate(&config.cwd, CANDIDATE_FILENAMES, max_bytes).await? {
|
||
- return Ok(Some(doc));
|
||
- }
|
||
+ discover_project_doc_path_from_dir(
|
||
+ &config.cwd,
|
||
+ CANDIDATE_FILENAMES,
|
||
+ config.project_doc_max_bytes,
|
||
+ )
|
||
+}
|
||
|
||
- // Walk up towards the filesystem root, stopping once we encounter the Git
|
||
- // repository root. The presence of **either** a `.git` *file* or
|
||
- // *directory* counts.
|
||
- let mut dir = config.cwd.clone();
|
||
+fn discover_project_doc_path_from_dir(
|
||
```
|
||
|
||
> Please make this function `async` and use the `tokio` async equivalents of filesystem operations.
|
||
|
||
- Created: 2025-07-30 16:26:41 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2243249154
|
||
|
||
```diff
|
||
@@ -90,48 +79,94 @@ async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
|
||
Ok(None)
|
||
}
|
||
|
||
-/// Attempt to load the first candidate file found in `dir`. Returns the file
|
||
-/// contents (truncated if it exceeds `max_bytes`) when successful.
|
||
-async fn load_first_candidate(
|
||
+fn find_non_empty_candidate(
|
||
dir: &Path,
|
||
names: &[&str],
|
||
max_bytes: usize,
|
||
-) -> std::io::Result<Option<String>> {
|
||
+) -> std::io::Result<Option<std::path::PathBuf>> {
|
||
+ use std::fs::File;
|
||
+ use std::io::Read;
|
||
+
|
||
for name in names {
|
||
let candidate = dir.join(name);
|
||
-
|
||
- let file = match tokio::fs::File::open(&candidate).await {
|
||
+ let mut file = match File::open(&candidate) {
|
||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue,
|
||
Err(e) => return Err(e),
|
||
Ok(f) => f,
|
||
};
|
||
|
||
- let size = file.metadata().await?.len();
|
||
-
|
||
- let reader = tokio::io::BufReader::new(file);
|
||
- let mut data = Vec::with_capacity(std::cmp::min(size as usize, max_bytes));
|
||
- let mut limited = reader.take(max_bytes as u64);
|
||
- limited.read_to_end(&mut data).await?;
|
||
-
|
||
- if size as usize > max_bytes {
|
||
- tracing::warn!(
|
||
- "Project doc `{}` exceeds {max_bytes} bytes - truncating.",
|
||
- candidate.display(),
|
||
- );
|
||
- }
|
||
-
|
||
- let contents = String::from_utf8_lossy(&data).to_string();
|
||
+ let size = file.metadata()?.len() as usize;
|
||
```
|
||
|
||
> Use `BufReader` as before? From chat:
|
||
>
|
||
> ```
|
||
> use tokio::io::{self, AsyncReadExt, BufReader};
|
||
> use tokio::fs::File;
|
||
>
|
||
> #[tokio::main]
|
||
> async fn main() -> io::Result<()> {
|
||
> let file = File::open("example.txt").await?;
|
||
> let mut reader = BufReader::new(file);
|
||
>
|
||
> let n = 10;
|
||
> let mut buf = vec![0u8; n];
|
||
> let bytes_read = reader.read(&mut buf).await?;
|
||
> buf.truncate(bytes_read);
|
||
>
|
||
> println!("Read {} bytes: {:?}", bytes_read, buf);
|
||
> Ok(())
|
||
> }
|
||
> ```
|
||
|
||
- Created: 2025-07-30 16:31:01 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2243262881
|
||
|
||
```diff
|
||
@@ -90,48 +79,94 @@ async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
|
||
Ok(None)
|
||
}
|
||
|
||
-/// Attempt to load the first candidate file found in `dir`. Returns the file
|
||
-/// contents (truncated if it exceeds `max_bytes`) when successful.
|
||
-async fn load_first_candidate(
|
||
+fn find_non_empty_candidate(
|
||
dir: &Path,
|
||
names: &[&str],
|
||
max_bytes: usize,
|
||
-) -> std::io::Result<Option<String>> {
|
||
+) -> std::io::Result<Option<std::path::PathBuf>> {
|
||
+ use std::fs::File;
|
||
+ use std::io::Read;
|
||
+
|
||
for name in names {
|
||
let candidate = dir.join(name);
|
||
-
|
||
- let file = match tokio::fs::File::open(&candidate).await {
|
||
+ let mut file = match File::open(&candidate) {
|
||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue,
|
||
Err(e) => return Err(e),
|
||
Ok(f) => f,
|
||
};
|
||
|
||
- let size = file.metadata().await?.len();
|
||
-
|
||
- let reader = tokio::io::BufReader::new(file);
|
||
- let mut data = Vec::with_capacity(std::cmp::min(size as usize, max_bytes));
|
||
- let mut limited = reader.take(max_bytes as u64);
|
||
- limited.read_to_end(&mut data).await?;
|
||
-
|
||
- if size as usize > max_bytes {
|
||
- tracing::warn!(
|
||
- "Project doc `{}` exceeds {max_bytes} bytes - truncating.",
|
||
- candidate.display(),
|
||
- );
|
||
- }
|
||
-
|
||
- let contents = String::from_utf8_lossy(&data).to_string();
|
||
+ let size = file.metadata()?.len() as usize;
|
||
+ let to_read = std::cmp::min(size, max_bytes);
|
||
+ let mut data = vec![0u8; to_read];
|
||
+ let read_n = file.read(&mut data)?;
|
||
```
|
||
|
||
> `read()` does not guarantee it fills the buffer:
|
||
>
|
||
> https://doc.rust-lang.org/std/io/trait.Read.html#tymethod.read
|
||
>
|
||
> `read_exact()` does that.
|
||
|
||
- Created: 2025-07-30 16:32:05 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2243265577
|
||
|
||
```diff
|
||
@@ -90,48 +79,94 @@ async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
|
||
Ok(None)
|
||
}
|
||
|
||
-/// Attempt to load the first candidate file found in `dir`. Returns the file
|
||
-/// contents (truncated if it exceeds `max_bytes`) when successful.
|
||
-async fn load_first_candidate(
|
||
+fn find_non_empty_candidate(
|
||
dir: &Path,
|
||
names: &[&str],
|
||
max_bytes: usize,
|
||
-) -> std::io::Result<Option<String>> {
|
||
+) -> std::io::Result<Option<std::path::PathBuf>> {
|
||
+ use std::fs::File;
|
||
+ use std::io::Read;
|
||
+
|
||
for name in names {
|
||
let candidate = dir.join(name);
|
||
-
|
||
- let file = match tokio::fs::File::open(&candidate).await {
|
||
+ let mut file = match File::open(&candidate) {
|
||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue,
|
||
Err(e) => return Err(e),
|
||
Ok(f) => f,
|
||
};
|
||
|
||
- let size = file.metadata().await?.len();
|
||
-
|
||
- let reader = tokio::io::BufReader::new(file);
|
||
- let mut data = Vec::with_capacity(std::cmp::min(size as usize, max_bytes));
|
||
- let mut limited = reader.take(max_bytes as u64);
|
||
- limited.read_to_end(&mut data).await?;
|
||
-
|
||
- if size as usize > max_bytes {
|
||
- tracing::warn!(
|
||
- "Project doc `{}` exceeds {max_bytes} bytes - truncating.",
|
||
- candidate.display(),
|
||
- );
|
||
- }
|
||
-
|
||
- let contents = String::from_utf8_lossy(&data).to_string();
|
||
+ let size = file.metadata()?.len() as usize;
|
||
+ let to_read = std::cmp::min(size, max_bytes);
|
||
+ let mut data = vec![0u8; to_read];
|
||
+ let read_n = file.read(&mut data)?;
|
||
+ let contents = String::from_utf8_lossy(&data[..read_n]).to_string();
|
||
if contents.trim().is_empty() {
|
||
- // Empty file – treat as not found.
|
||
continue;
|
||
}
|
||
|
||
- return Ok(Some(contents));
|
||
+ return Ok(Some(candidate));
|
||
}
|
||
|
||
Ok(None)
|
||
}
|
||
|
||
+/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single
|
||
+/// string of instructions.
|
||
+pub(crate) async fn get_user_instructions(config: &Config) -> Option<String> {
|
||
+ match find_project_doc(config).await {
|
||
+ Ok(Some(project_doc)) => match &config.user_instructions {
|
||
+ Some(original_instructions) => Some(format!(
|
||
+ "{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}"
|
||
+ )),
|
||
+ None => Some(project_doc),
|
||
+ },
|
||
+ Ok(None) => config.user_instructions.clone(),
|
||
+ Err(e) => {
|
||
+ error!("error trying to find project doc: {e:#}");
|
||
+ config.user_instructions.clone()
|
||
+ }
|
||
+ }
|
||
+}
|
||
+
|
||
+/// Attempt to locate and load the project documentation. Currently, the search
|
||
+/// starts from `Config::cwd`, but if we may want to consider other directories
|
||
+/// in the future, e.g., additional writable directories in the `SandboxPolicy`.
|
||
+///
|
||
+/// On success returns `Ok(Some(contents))`. If no documentation file is found
|
||
+/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as
|
||
+/// `Err` so callers can decide how to handle them.
|
||
+async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
|
||
+ use tokio::io::BufReader;
|
||
+
|
||
+ let Some(path) = discover_project_doc_path(config)? else {
|
||
+ return Ok(None);
|
||
+ };
|
||
+
|
||
+ let max_bytes = config.project_doc_max_bytes;
|
||
```
|
||
|
||
> Feels like we should consolidate this with logic in `find_non_empty_candidate()`?
|
||
|
||
### codex-rs/tui/src/history_cell.rs
|
||
|
||
- Created: 2025-07-30 16:34:22 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2243271535
|
||
|
||
```diff
|
||
@@ -176,8 +176,19 @@ impl HistoryCell {
|
||
]),
|
||
];
|
||
|
||
+ // Show which AGENTS.md is being used (or 'none' if unavailable).
|
||
+ let agents_value = if config.project_doc_max_bytes > 0 {
|
||
+ match codex_core::discover_project_doc_path(config) {
|
||
```
|
||
|
||
> Oh, is this why the function is sync instead of async?
|
||
|
||
- Created: 2025-07-30 16:36:12 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2243276241
|
||
|
||
```diff
|
||
@@ -176,8 +176,19 @@ impl HistoryCell {
|
||
]),
|
||
];
|
||
|
||
+ // Show which AGENTS.md is being used (or 'none' if unavailable).
|
||
+ let agents_value = if config.project_doc_max_bytes > 0 {
|
||
+ match codex_core::discover_project_doc_path(config) {
|
||
```
|
||
|
||
> @nornagon-openai @easong-openai have you formed any opinions about introducing more `async` into the TUI?
|
||
|
||
- Created: 2025-08-06 19:35:55 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2258095262
|
||
|
||
```diff
|
||
@@ -176,8 +176,19 @@ impl HistoryCell {
|
||
]),
|
||
];
|
||
|
||
+ // Show which AGENTS.md is being used (or 'none' if unavailable).
|
||
+ let agents_value = if config.project_doc_max_bytes > 0 {
|
||
+ match codex_core::discover_project_doc_path(config) {
|
||
```
|
||
|
||
> @dylan-hurd-oai is also looking into trying to get some Git info at startup and potentially on every conversation turn, so I think we need to figure out a way to leverage async better here. (Note `git_info.rs` is already doing some of this for use with rollouts.)
|
||
>
|
||
|
||
- Created: 2025-08-06 19:36:47 UTC | Link: https://github.com/openai/codex/pull/1730#discussion_r2258096688
|
||
|
||
```diff
|
||
@@ -176,8 +176,19 @@ impl HistoryCell {
|
||
]),
|
||
];
|
||
|
||
+ // Show which AGENTS.md is being used (or 'none' if unavailable).
|
||
+ let agents_value = if config.project_doc_max_bytes > 0 {
|
||
+ match codex_core::discover_project_doc_path(config) {
|
||
```
|
||
|
||
> Can should this be sent as part of the `SessionConfiguredEvent`? |