mirror of
https://github.com/openai/codex.git
synced 2026-05-04 05:11:37 +03:00
feat: warning large commits (#6838)
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::HashSet;
|
||||
use std::ffi::OsString;
|
||||
use std::fs;
|
||||
@@ -21,6 +22,8 @@ use crate::operations::run_git_for_stdout_all;
|
||||
|
||||
/// Default commit message used for ghost commits when none is provided.
|
||||
const DEFAULT_COMMIT_MESSAGE: &str = "codex snapshot";
|
||||
/// Default threshold that triggers a warning about large untracked directories.
|
||||
const LARGE_UNTRACKED_WARNING_THRESHOLD: usize = 200;
|
||||
|
||||
/// Options to control ghost commit creation.
|
||||
pub struct CreateGhostCommitOptions<'a> {
|
||||
@@ -29,6 +32,19 @@ pub struct CreateGhostCommitOptions<'a> {
|
||||
pub force_include: Vec<PathBuf>,
|
||||
}
|
||||
|
||||
/// Summary produced alongside a ghost snapshot.
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq)]
|
||||
pub struct GhostSnapshotReport {
|
||||
pub large_untracked_dirs: Vec<LargeUntrackedDir>,
|
||||
}
|
||||
|
||||
/// Directory containing a large amount of untracked content.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct LargeUntrackedDir {
|
||||
pub path: PathBuf,
|
||||
pub file_count: usize,
|
||||
}
|
||||
|
||||
impl<'a> CreateGhostCommitOptions<'a> {
|
||||
/// Creates options scoped to the provided repository path.
|
||||
pub fn new(repo_path: &'a Path) -> Self {
|
||||
@@ -64,10 +80,94 @@ impl<'a> CreateGhostCommitOptions<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn detect_large_untracked_dirs(files: &[PathBuf], dirs: &[PathBuf]) -> Vec<LargeUntrackedDir> {
|
||||
let mut counts: BTreeMap<PathBuf, usize> = BTreeMap::new();
|
||||
|
||||
let mut sorted_dirs: Vec<&PathBuf> = dirs.iter().collect();
|
||||
sorted_dirs.sort_by(|a, b| {
|
||||
let a_components = a.components().count();
|
||||
let b_components = b.components().count();
|
||||
b_components.cmp(&a_components).then_with(|| a.cmp(b))
|
||||
});
|
||||
|
||||
for file in files {
|
||||
let mut key: Option<PathBuf> = None;
|
||||
for dir in &sorted_dirs {
|
||||
if file.starts_with(dir.as_path()) {
|
||||
key = Some((*dir).clone());
|
||||
break;
|
||||
}
|
||||
}
|
||||
let key = key.unwrap_or_else(|| {
|
||||
file.parent()
|
||||
.map(PathBuf::from)
|
||||
.unwrap_or_else(|| PathBuf::from("."))
|
||||
});
|
||||
let entry = counts.entry(key).or_insert(0);
|
||||
*entry += 1;
|
||||
}
|
||||
|
||||
let mut result: Vec<LargeUntrackedDir> = counts
|
||||
.into_iter()
|
||||
.filter(|(_, count)| *count >= LARGE_UNTRACKED_WARNING_THRESHOLD)
|
||||
.map(|(path, file_count)| LargeUntrackedDir { path, file_count })
|
||||
.collect();
|
||||
result.sort_by(|a, b| {
|
||||
b.file_count
|
||||
.cmp(&a.file_count)
|
||||
.then_with(|| a.path.cmp(&b.path))
|
||||
});
|
||||
result
|
||||
}
|
||||
|
||||
fn to_session_relative_path(path: &Path, repo_prefix: Option<&Path>) -> PathBuf {
|
||||
match repo_prefix {
|
||||
Some(prefix) => path
|
||||
.strip_prefix(prefix)
|
||||
.map(PathBuf::from)
|
||||
.unwrap_or_else(|_| path.to_path_buf()),
|
||||
None => path.to_path_buf(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a ghost commit capturing the current state of the repository's working tree.
|
||||
pub fn create_ghost_commit(
|
||||
options: &CreateGhostCommitOptions<'_>,
|
||||
) -> Result<GhostCommit, GitToolingError> {
|
||||
create_ghost_commit_with_report(options).map(|(commit, _)| commit)
|
||||
}
|
||||
|
||||
/// Compute a report describing the working tree for a ghost snapshot without creating a commit.
|
||||
pub fn capture_ghost_snapshot_report(
|
||||
options: &CreateGhostCommitOptions<'_>,
|
||||
) -> Result<GhostSnapshotReport, GitToolingError> {
|
||||
ensure_git_repository(options.repo_path)?;
|
||||
|
||||
let repo_root = resolve_repository_root(options.repo_path)?;
|
||||
let repo_prefix = repo_subdir(repo_root.as_path(), options.repo_path);
|
||||
let existing_untracked =
|
||||
capture_existing_untracked(repo_root.as_path(), repo_prefix.as_deref())?;
|
||||
|
||||
let warning_files = existing_untracked
|
||||
.files
|
||||
.iter()
|
||||
.map(|path| to_session_relative_path(path, repo_prefix.as_deref()))
|
||||
.collect::<Vec<_>>();
|
||||
let warning_dirs = existing_untracked
|
||||
.dirs
|
||||
.iter()
|
||||
.map(|path| to_session_relative_path(path, repo_prefix.as_deref()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Ok(GhostSnapshotReport {
|
||||
large_untracked_dirs: detect_large_untracked_dirs(&warning_files, &warning_dirs),
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a ghost commit capturing the current state of the repository's working tree along with a report.
|
||||
pub fn create_ghost_commit_with_report(
|
||||
options: &CreateGhostCommitOptions<'_>,
|
||||
) -> Result<(GhostCommit, GhostSnapshotReport), GitToolingError> {
|
||||
ensure_git_repository(options.repo_path)?;
|
||||
|
||||
let repo_root = resolve_repository_root(options.repo_path)?;
|
||||
@@ -76,6 +176,18 @@ pub fn create_ghost_commit(
|
||||
let existing_untracked =
|
||||
capture_existing_untracked(repo_root.as_path(), repo_prefix.as_deref())?;
|
||||
|
||||
let warning_files = existing_untracked
|
||||
.files
|
||||
.iter()
|
||||
.map(|path| to_session_relative_path(path, repo_prefix.as_deref()))
|
||||
.collect::<Vec<_>>();
|
||||
let warning_dirs = existing_untracked
|
||||
.dirs
|
||||
.iter()
|
||||
.map(|path| to_session_relative_path(path, repo_prefix.as_deref()))
|
||||
.collect::<Vec<_>>();
|
||||
let large_untracked_dirs = detect_large_untracked_dirs(&warning_files, &warning_dirs);
|
||||
|
||||
let normalized_force = options
|
||||
.force_include
|
||||
.iter()
|
||||
@@ -143,11 +255,18 @@ pub fn create_ghost_commit(
|
||||
Some(commit_env.as_slice()),
|
||||
)?;
|
||||
|
||||
Ok(GhostCommit::new(
|
||||
let ghost_commit = GhostCommit::new(
|
||||
commit_id,
|
||||
parent,
|
||||
existing_untracked.files,
|
||||
existing_untracked.dirs,
|
||||
);
|
||||
|
||||
Ok((
|
||||
ghost_commit,
|
||||
GhostSnapshotReport {
|
||||
large_untracked_dirs,
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
@@ -460,6 +579,95 @@ mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_snapshot_reports_large_untracked_dirs() -> Result<(), GitToolingError> {
|
||||
let temp = tempfile::tempdir()?;
|
||||
let repo = temp.path();
|
||||
init_test_repo(repo);
|
||||
|
||||
std::fs::write(repo.join("tracked.txt"), "contents\n")?;
|
||||
run_git_in(repo, &["add", "tracked.txt"]);
|
||||
run_git_in(
|
||||
repo,
|
||||
&[
|
||||
"-c",
|
||||
"user.name=Tester",
|
||||
"-c",
|
||||
"user.email=test@example.com",
|
||||
"commit",
|
||||
"-m",
|
||||
"initial",
|
||||
],
|
||||
);
|
||||
|
||||
let models = repo.join("models");
|
||||
std::fs::create_dir(&models)?;
|
||||
for idx in 0..(LARGE_UNTRACKED_WARNING_THRESHOLD + 1) {
|
||||
let file = models.join(format!("weights-{idx}.bin"));
|
||||
std::fs::write(file, "data\n")?;
|
||||
}
|
||||
|
||||
let (ghost, report) =
|
||||
create_ghost_commit_with_report(&CreateGhostCommitOptions::new(repo))?;
|
||||
assert!(ghost.parent().is_some());
|
||||
assert_eq!(
|
||||
report.large_untracked_dirs,
|
||||
vec![LargeUntrackedDir {
|
||||
path: PathBuf::from("models"),
|
||||
file_count: LARGE_UNTRACKED_WARNING_THRESHOLD + 1,
|
||||
}]
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_snapshot_reports_nested_large_untracked_dirs_under_tracked_parent()
|
||||
-> Result<(), GitToolingError> {
|
||||
let temp = tempfile::tempdir()?;
|
||||
let repo = temp.path();
|
||||
init_test_repo(repo);
|
||||
|
||||
// Create a tracked src directory.
|
||||
let src = repo.join("src");
|
||||
std::fs::create_dir(&src)?;
|
||||
std::fs::write(src.join("main.rs"), "fn main() {}\n")?;
|
||||
run_git_in(repo, &["add", "src/main.rs"]);
|
||||
run_git_in(
|
||||
repo,
|
||||
&[
|
||||
"-c",
|
||||
"user.name=Tester",
|
||||
"-c",
|
||||
"user.email=test@example.com",
|
||||
"commit",
|
||||
"-m",
|
||||
"initial",
|
||||
],
|
||||
);
|
||||
|
||||
// Create a large untracked tree nested under the tracked src directory.
|
||||
let generated = src.join("generated").join("cache");
|
||||
std::fs::create_dir_all(&generated)?;
|
||||
for idx in 0..(LARGE_UNTRACKED_WARNING_THRESHOLD + 1) {
|
||||
let file = generated.join(format!("file-{idx}.bin"));
|
||||
std::fs::write(file, "data\n")?;
|
||||
}
|
||||
|
||||
let (_, report) = create_ghost_commit_with_report(&CreateGhostCommitOptions::new(repo))?;
|
||||
assert_eq!(report.large_untracked_dirs.len(), 1);
|
||||
let entry = &report.large_untracked_dirs[0];
|
||||
assert_ne!(entry.path, PathBuf::from("src"));
|
||||
assert!(
|
||||
entry.path.starts_with(Path::new("src/generated")),
|
||||
"unexpected path for large untracked directory: {}",
|
||||
entry.path.display()
|
||||
);
|
||||
assert_eq!(entry.file_count, LARGE_UNTRACKED_WARNING_THRESHOLD + 1);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// Ensures ghost commits succeed in repositories without an existing HEAD.
|
||||
fn create_snapshot_without_existing_head() -> Result<(), GitToolingError> {
|
||||
|
||||
Reference in New Issue
Block a user