Files
codex/prs/bolinfest/study/PR-1598-study.md
2025-09-02 15:17:45 -07:00

6.0 KiB
Raw Blame History

DOs

  • Extract Module (git_info.rs): Isolate Git logic in its own module and import from call sites.
// core/src/lib.rs
pub mod git_info;

// core/src/rollout.rs
use crate::git_info::{collect_git_info, GitInfo};
  • Order Exports Clearly: Put the primary async API right after the struct so the modules surface is obvious.
// core/src/git_info.rs
#[derive(Serialize, Deserialize, Clone)]
pub struct GitInfo {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub commit_hash: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub branch: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub repository_url: Option<String>,
}

pub async fn collect_git_info(cwd: &Path) -> Option<GitInfo> { /* ... */ }
  • Use Tokio + Timeout: Use tokio::process::Command and tokio::time::timeout with a 5s cap.
const GIT_COMMAND_TIMEOUT: TokioDuration = TokioDuration::from_secs(5);

async fn run_git_command_with_timeout(args: &[&str], cwd: &Path) -> Option<std::process::Output> {
    timeout(
        GIT_COMMAND_TIMEOUT,
        Command::new("git").args(args).current_dir(cwd).output(),
    ).await.ok().and_then(Result::ok)
}
  • Parallelize Git Calls: Run independent git commands concurrently.
let (commit, branch, url) = tokio::join!(
    run_git_command_with_timeout(&["rev-parse", "HEAD"], cwd),
    run_git_command_with_timeout(&["rev-parse", "--abbrev-ref", "HEAD"], cwd),
    run_git_command_with_timeout(&["remote", "get-url", "origin"], cwd),
);
  • Write Metadata In Writer Task: Compute git info inside rollout_writer and write meta+git before processing messages to avoid startup stalls.
async fn rollout_writer(
    mut file: tokio::fs::File,
    mut rx: mpsc::Receiver<RolloutCmd>,
    mut meta: Option<SessionMeta>,
    cwd: std::path::PathBuf,
) {
    if let Some(session_meta) = meta.take() {
        let git = collect_git_info(&cwd).await;
        let payload = SessionMetaWithGit { meta: session_meta, git };
        if let Ok(line) = serde_json::to_string(&payload) {
            let _ = file.write_all(format!("{line}\n").as_bytes()).await;
            let _ = file.flush().await;
        }
    }
    while let Some(cmd) = rx.recv().await { /* ... */ }
}
  • Keep SessionMeta Immutable: Dont add a mutable/optional git field to it; use a wrapper with flatten.
#[derive(Serialize)]
pub struct SessionMeta { pub id: Uuid, pub timestamp: String, pub instructions: Option<String> }

#[derive(Serialize)]
struct SessionMetaWithGit {
    #[serde(flatten)]
    meta: SessionMeta,
    #[serde(skip_serializing_if = "Option::is_none")]
    git: Option<GitInfo>,
}
  • Pass cwd Explicitly Where Needed: Require a PathBuf for writer/resume; avoid Option if its always available.
pub async fn resume(path: &Path, cwd: PathBuf) -> io::Result<(Self, SavedSession)> {
    // ...
    tokio::task::spawn(rollout_writer(tokio::fs::File::from_std(file), rx, None, cwd));
    Ok((Self { tx }, saved))
}
  • Use Config.cwd When Available: In constructors where Config is present, use config.cwd.clone() instead of plumbing extra params.
let cwd = config.cwd.clone();
tokio::task::spawn(rollout_writer(tokio_file, rx, Some(meta), cwd));
  • Omit Empty Fields: Use #[serde(skip_serializing_if = "Option::is_none")] for optional metadata to keep rollouts tidy.
#[derive(Serialize)]
struct GitInfo { #[serde(skip_serializing_if = "Option::is_none")] repository_url: Option<String> /* ... */ }
  • Stabilize Tests With Threads (if needed): Prefer adding worker threads over disabling features.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn integration_git_info_unit_test() { /* ... */ }
  • Prefer Config Over Env Flags: If you must toggle in tests, add an experimental_* config knob.
// Pseudocode
if config.experimental_collect_git_metadata.unwrap_or(true) {
    git = collect_git_info(&cwd).await;
}

DONTs

  • Dont Block Startup: Avoid awaiting collect_git_info() inside RolloutRecorder::new(). Compute it in the writer task instead.
// ❌ Anti-pattern
let git = collect_git_info(&config.cwd).await; // blocks startup
recorder.record_item(&with_git(meta, git)).await?;
  • Dont Mutate SessionMeta: Dont add git: Option<...> directly to SessionMeta; keep it stable and wrap it.
// ❌ Anti-pattern
#[derive(Serialize)]
struct SessionMeta { /* ... */ git: Option<GitInfo> } // introduces mutability/optionality
  • Dont Introduce New Env Flags: Avoid CODEX_DISABLE_GIT_INFO-style switches; use config or runtime settings.
// ❌ Anti-pattern
if std::env::var("CODEX_DISABLE_GIT_INFO").is_ok() { /* ... */ }
  • Dont Use std::process::Command In Async Paths: It blocks; use Tokios async process API.
// ❌ Anti-pattern
std::process::Command::new("git").args(args).output().unwrap(); // blocking
  • Dont Run Git Commands Serially: Parallelize with tokio::join! instead of sequential awaits.
// ❌ Anti-pattern
let a = run_git(...).await;
let b = run_git(...).await; // slower, serial
  • Dont Use Option<PathBuf> For Required cwd: If cwd always exists, make it a required parameter.
// ❌ Anti-pattern
fn rollout_writer(..., cwd: Option<PathBuf>) { /* ... */ }
  • Dont Leave Dead/Trailing Code: Remove leftover blocks and unused helpers after refactors.
// ❌ Anti-pattern
// stray block or unused fn lingering at end of file
  • Dont Treat Detached HEAD As A Branch: Map "HEAD" to None to reflect detached state.
let branch = String::from_utf8(out.stdout).ok().map(|s| s.trim().to_string());
let branch = branch.filter(|b| b != "HEAD");
  • Dont Overly Aggressive Timeouts: Avoid too-short caps that flake on large repos; prefer ~5s with async.
// ✅ Use 5s cap as a reasonable default
const GIT_COMMAND_TIMEOUT: TokioDuration = TokioDuration::from_secs(5);