remote tasks

This commit is contained in:
easong-openai
2025-09-03 16:57:37 -07:00
parent e83c5f429c
commit d2fcf4314e
51 changed files with 6048 additions and 68 deletions

View File

@@ -0,0 +1,30 @@
[package]
name = "codex-cloud-tasks-client"
version = { workspace = true }
edition = "2024"
[lib]
name = "codex_cloud_tasks_client"
path = "src/lib.rs"
[lints]
workspace = true
[features]
default = ["online"]
online = ["dep:reqwest", "dep:tokio", "dep:codex-backend-client"]
mock = []
[dependencies]
anyhow = "1"
codex-cloud-tasks-api = { path = "../cloud-tasks-api" }
async-trait = "0.1"
chrono = { version = "0.4", features = ["serde"] }
codex-apply-patch = { path = "../apply-patch" }
diffy = "0.4.2"
reqwest = { version = "0.12", features = ["json"], optional = true }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
thiserror = "2.0.12"
tokio = { version = "1", features = ["macros", "rt-multi-thread"], optional = true }
codex-backend-client = { path = "../backend-client", optional = true }

View File

@@ -0,0 +1,487 @@
use crate::ApplyOutcome;
use crate::ApplyStatus;
use crate::CloudBackend;
use crate::Error;
use crate::Result;
use crate::TaskId;
use crate::TaskStatus;
use crate::TaskSummary;
use chrono::DateTime;
use chrono::Utc;
use codex_cloud_tasks_api::DiffSummary;
use serde_json::Value;
use std::collections::HashMap;
use codex_backend_client as backend;
use codex_backend_client::CodeTaskDetailsResponseExt;
use codex_backend_client::types::extract_file_paths_list;
#[derive(Clone)]
pub struct HttpClient {
pub base_url: String,
backend: backend::Client,
}
impl HttpClient {
pub fn new(base_url: impl Into<String>) -> anyhow::Result<Self> {
let base_url = base_url.into();
let backend = backend::Client::new(base_url.clone())?;
Ok(Self { base_url, backend })
}
pub fn with_bearer_token(mut self, token: impl Into<String>) -> Self {
self.backend = self.backend.clone().with_bearer_token(token);
self
}
pub fn with_user_agent(mut self, ua: impl Into<String>) -> Self {
self.backend = self.backend.clone().with_user_agent(ua);
self
}
pub fn with_chatgpt_account_id(mut self, account_id: impl Into<String>) -> Self {
self.backend = self.backend.clone().with_chatgpt_account_id(account_id);
self
}
}
#[async_trait::async_trait]
impl CloudBackend for HttpClient {
async fn list_tasks(&self, env: Option<&str>) -> Result<Vec<TaskSummary>> {
let resp = self
.backend
.list_tasks(Some(20), Some("current"), env)
.await
.map_err(|e| Error::Http(format!("list_tasks failed: {e}")))?;
let tasks: Vec<TaskSummary> = resp
.items
.into_iter()
.map(map_task_list_item_to_summary)
.collect();
// Debug log for env filtering visibility
append_error_log(&format!(
"http.list_tasks: env={} items={}",
env.unwrap_or("<all>"),
tasks.len()
));
Ok(tasks)
}
async fn get_task_diff(&self, _id: TaskId) -> Result<String> {
let id = _id.0;
let (details, body, ct) = self
.backend
.get_task_details_with_body(&id)
.await
.map_err(|e| Error::Http(format!("get_task_details failed: {e}")))?;
if let Some(diff) = details.unified_diff() {
return Ok(diff);
}
// No diff yet (pending or non-diff task). Return a structured error so UI can render cleanly.
// Keep a concise body tail in logs if needed by callers.
let _ = (body, ct); // silence unused if logging is disabled at callsite
Err(Error::NoDiffYet)
}
async fn get_task_messages(&self, _id: TaskId) -> Result<Vec<String>> {
let id = _id.0;
let (details, body, ct) = self
.backend
.get_task_details_with_body(&id)
.await
.map_err(|e| Error::Http(format!("get_task_details failed: {e}")))?;
let mut msgs = details.assistant_text_messages();
if msgs.is_empty() {
// Fallback: some pending tasks expose only worklog messages; parse from raw body.
if let Ok(full) = serde_json::from_str::<serde_json::Value>(&body) {
// worklog.messages[*] where author.role == "assistant" → content.parts[*].text
if let Some(arr) = full
.get("current_assistant_turn")
.and_then(|v| v.get("worklog"))
.and_then(|v| v.get("messages"))
.and_then(|v| v.as_array())
{
for m in arr {
let is_assistant = m
.get("author")
.and_then(|a| a.get("role"))
.and_then(|r| r.as_str())
== Some("assistant");
if !is_assistant {
continue;
}
if let Some(parts) = m
.get("content")
.and_then(|c| c.get("parts"))
.and_then(|p| p.as_array())
{
for p in parts {
if let Some(s) = p.as_str() {
// Shape: content { content_type: "text", parts: ["..."] }
if !s.is_empty() {
msgs.push(s.to_string());
}
continue;
}
if let Some(obj) = p.as_object() {
if obj.get("content_type").and_then(|t| t.as_str())
== Some("text")
{
if let Some(txt) = obj.get("text").and_then(|t| t.as_str())
{
msgs.push(txt.to_string());
}
}
}
}
}
}
}
}
}
if !msgs.is_empty() {
return Ok(msgs);
}
if let Some(err) = details.assistant_error_message() {
return Ok(vec![format!("Task failed: {err}")]);
}
// No assistant messages found; return a debuggable error with context for logging.
let url = if self.base_url.contains("/backend-api") {
format!("{}/wham/tasks/{}", self.base_url, id)
} else {
format!("{}/api/codex/tasks/{}", self.base_url, id)
};
Err(Error::Http(format!(
"No assistant text messages in response. GET {url}; content-type={ct}; body={body}"
)))
}
async fn apply_task(&self, _id: TaskId) -> Result<ApplyOutcome> {
let id = _id.0;
// Fetch diff fresh and apply locally via git (unified diffs).
let details = self
.backend
.get_task_details(&id)
.await
.map_err(|e| Error::Http(format!("get_task_details failed: {e}")))?;
let diff = details
.unified_diff()
.ok_or_else(|| Error::Msg(format!("No diff available for task {id}")))?;
let diff = match crate::patch_apply::classify_patch(&diff) {
crate::patch_apply::PatchKind::HunkOnly => {
let files = extract_file_paths_list(&details);
if files.len() > 1 {
let parts = crate::patch_apply::split_hunk_body_into_files(&diff);
if parts.len() == files.len() {
let mut acc = String::new();
for (i, (oldp, newp)) in files.iter().enumerate() {
let u = crate::patch_apply::synthesize_unified_single_file(
&parts[i], oldp, newp,
);
acc.push_str(&u);
if !acc.ends_with("\n") {
acc.push('\n');
}
}
acc
} else if let Some((oldp, newp)) = details.single_file_paths() {
crate::patch_apply::synthesize_unified_single_file(&diff, &oldp, &newp)
} else {
diff
}
} else if let Some((oldp, newp)) = details.single_file_paths() {
crate::patch_apply::synthesize_unified_single_file(&diff, &oldp, &newp)
} else {
diff
}
}
_ => diff,
};
// Run the centralized Git apply path (supports unified diffs and Codex conversion)
let ctx = crate::patch_apply::context_from_env(
std::env::current_dir().unwrap_or_else(|_| std::env::temp_dir()),
);
let res = crate::patch_apply::apply_patch(&diff, &ctx);
let status = match res.status {
crate::patch_apply::ApplyStatus::Success => ApplyStatus::Success,
crate::patch_apply::ApplyStatus::Partial => ApplyStatus::Partial,
crate::patch_apply::ApplyStatus::Error => ApplyStatus::Error,
};
let applied = matches!(status, ApplyStatus::Success);
let message = match status {
ApplyStatus::Success => format!(
"Applied task {id} locally ({} changed)",
res.changed_paths.len()
),
ApplyStatus::Partial => format!(
"Apply partially succeeded for task {id} (changed={}, skipped={}, conflicts={})",
res.changed_paths.len(),
res.skipped_paths.len(),
res.conflict_paths.len()
),
ApplyStatus::Error => {
let is_check = res.diagnostics.contains("apply --check failed");
if is_check {
format!(
"Apply check failed for task {id}: patch does not apply to your working tree. No changes were made. See error.log for details.",
)
} else {
// Compact, single-line fallback; avoid embedding multiline stderr directly.
let mut diag = res.diagnostics.replace('\n', " ");
if diag.len() > 600 {
diag.truncate(600);
diag.push_str("");
}
format!(
"Apply failed for task {id} (changed={}, skipped={}, conflicts={}); {}",
res.changed_paths.len(),
res.skipped_paths.len(),
res.conflict_paths.len(),
diag
)
}
}
};
// On apply failure, log a detailed record including the diff we attempted.
if matches!(status, ApplyStatus::Error) {
let mut log = String::new();
let summary = summarize_patch_for_logging(&diff);
use std::fmt::Write as _;
let _ = writeln!(
&mut log,
"apply_error: id={} changed={} skipped={} conflicts={}; {}",
id,
res.changed_paths.len(),
res.skipped_paths.len(),
res.conflict_paths.len(),
res.diagnostics
);
let _ = writeln!(&mut log, "{summary}");
let _ = writeln!(&mut log, "----- PATCH BEGIN -----");
let _ = writeln!(&mut log, "{diff}");
let _ = writeln!(&mut log, "----- PATCH END -----");
append_error_log(&log);
}
Ok(ApplyOutcome {
applied,
status,
message,
skipped_paths: res.skipped_paths,
conflict_paths: res.conflict_paths,
})
}
async fn create_task(
&self,
env_id: &str,
prompt: &str,
git_ref: &str,
qa_mode: bool,
) -> Result<codex_cloud_tasks_api::CreatedTask> {
// Build request payload patterned after VSCode/newtask.rs
let mut input_items: Vec<serde_json::Value> = Vec::new();
input_items.push(serde_json::json!({
"type": "message",
"role": "user",
"content": [{ "content_type": "text", "text": prompt }]
}));
if let Ok(diff) = std::env::var("CODEX_STARTING_DIFF") {
if !diff.is_empty() {
input_items.push(serde_json::json!({
"type": "pre_apply_patch",
"output_diff": { "diff": diff }
}));
}
}
let request_body = serde_json::json!({
"new_task": {
"environment_id": env_id,
"branch": git_ref,
"run_environment_in_qa_mode": qa_mode,
},
"input_items": input_items,
});
// Use the underlying backend client to post with proper headers
match self.backend.create_task(request_body).await {
Ok(id) => {
append_error_log(&format!(
"new_task: created id={id} env={} prompt_chars={}",
env_id,
prompt.chars().count()
));
Ok(codex_cloud_tasks_api::CreatedTask { id: TaskId(id) })
}
Err(e) => {
append_error_log(&format!(
"new_task: create failed env={} prompt_chars={}: {}",
env_id,
prompt.chars().count(),
e
));
Err(Error::Http(format!("create_task failed: {e}")))
}
}
}
}
fn map_task_list_item_to_summary(src: backend::TaskListItem) -> TaskSummary {
fn env_label_from_status_display(v: Option<&HashMap<String, Value>>) -> Option<String> {
let obj = v?;
let raw = obj.get("environment_label")?;
if let Some(s) = raw.as_str() {
if s.trim().is_empty() {
return None;
}
return Some(s.to_string());
}
if let Some(o) = raw.as_object() {
// Best-effort support for rich shapes: { text: "..." } or { plain_text: "..." }
if let Some(s) = o.get("text").and_then(Value::as_str) {
if !s.trim().is_empty() {
return Some(s.to_string());
}
}
if let Some(s) = o.get("plain_text").and_then(Value::as_str) {
if !s.trim().is_empty() {
return Some(s.to_string());
}
}
// Fallback: compact JSON for debugging
if let Ok(s) = serde_json::to_string(o) {
if !s.is_empty() {
return Some(s);
}
}
}
None
}
// Best-effort parse of diff_stats (when present in latest_turn_status_display)
fn diff_summary_from_status_display(v: Option<&HashMap<String, Value>>) -> DiffSummary {
let mut out = DiffSummary::default();
let Some(map) = v else { return out };
let latest = map
.get("latest_turn_status_display")
.and_then(Value::as_object);
let Some(latest) = latest else { return out };
if let Some(ds) = latest.get("diff_stats").and_then(Value::as_object) {
if let Some(n) = ds.get("files_modified").and_then(Value::as_i64) {
out.files_changed = n.max(0) as usize;
}
if let Some(n) = ds.get("lines_added").and_then(Value::as_i64) {
out.lines_added = n.max(0) as usize;
}
if let Some(n) = ds.get("lines_removed").and_then(Value::as_i64) {
out.lines_removed = n.max(0) as usize;
}
}
out
}
TaskSummary {
id: TaskId(src.id),
title: src.title,
status: map_status(src.task_status_display.as_ref()),
updated_at: parse_updated_at(src.updated_at.as_ref()),
environment_id: None,
environment_label: env_label_from_status_display(src.task_status_display.as_ref()),
summary: diff_summary_from_status_display(src.task_status_display.as_ref()),
}
}
fn map_status(v: Option<&HashMap<String, Value>>) -> TaskStatus {
if let Some(val) = v {
// Prefer nested latest_turn_status_display.turn_status when present.
if let Some(turn) = val
.get("latest_turn_status_display")
.and_then(Value::as_object)
{
if let Some(s) = turn.get("turn_status").and_then(Value::as_str) {
return match s {
"failed" => TaskStatus::Error,
"completed" => TaskStatus::Ready,
"in_progress" => TaskStatus::Pending,
"pending" => TaskStatus::Pending,
"cancelled" => TaskStatus::Error,
_ => TaskStatus::Pending,
};
}
}
// Legacy or alternative flat state.
if let Some(state) = val.get("state").and_then(Value::as_str) {
return match state {
"pending" => TaskStatus::Pending,
"ready" => TaskStatus::Ready,
"applied" => TaskStatus::Applied,
"error" => TaskStatus::Error,
_ => TaskStatus::Pending,
};
}
}
TaskStatus::Pending
}
fn parse_updated_at(ts: Option<&f64>) -> DateTime<Utc> {
if let Some(v) = ts {
// Value is seconds since epoch with fractional part.
let secs = *v as i64;
let nanos = ((*v - secs as f64) * 1_000_000_000.0) as u32;
return DateTime::<Utc>::from(
std::time::UNIX_EPOCH + std::time::Duration::new(secs.max(0) as u64, nanos),
);
}
Utc::now()
}
/// Return a compact one-line classification of the patch plus a short head snippet
/// to aid debugging when apply fails.
fn summarize_patch_for_logging(patch: &str) -> String {
let trimmed = patch.trim_start();
let kind = if trimmed.starts_with("*** Begin Patch") {
"codex-patch"
} else if trimmed.starts_with("diff --git ") || trimmed.contains("\n*** End Patch\n") {
// In some cases providers nest a codex patch inside another format; detect both.
"git-diff"
} else if trimmed.starts_with("@@ ") || trimmed.contains("\n@@ ") {
"unified-diff"
} else {
"unknown"
};
let lines = patch.lines().count();
let chars = patch.len();
let cwd = std::env::current_dir()
.ok()
.map(|p| p.display().to_string())
.unwrap_or_else(|| "<unknown>".to_string());
// Grab the first up-to-20 non-empty lines for context.
let head: String = patch.lines().take(20).collect::<Vec<&str>>().join("\n");
// Make sure we don't explode logs with huge content.
let head_trunc = if head.len() > 800 {
format!("{}", &head[..800])
} else {
head
};
format!(
"patch_summary: kind={kind} lines={lines} chars={chars} cwd={cwd} ; head=\n{head_trunc}"
)
}
fn append_error_log(message: &str) {
let ts = Utc::now().to_rfc3339();
if let Ok(mut f) = std::fs::OpenOptions::new()
.create(true)
.append(true)
.open("error.log")
{
use std::io::Write as _;
let _ = writeln!(f, "[{ts}] {message}");
}
}

View File

@@ -0,0 +1,26 @@
#![deny(clippy::unwrap_used, clippy::expect_used)]
pub use api::ApplyOutcome;
pub use api::ApplyStatus;
pub use api::CloudBackend;
pub use api::Error;
pub use api::Result;
pub use api::TaskId;
pub use api::TaskStatus;
pub use api::TaskSummary;
use codex_cloud_tasks_api as api;
#[cfg(feature = "mock")]
mod mock;
#[cfg(feature = "online")]
mod http;
#[cfg(feature = "mock")]
pub use mock::MockClient;
#[cfg(feature = "online")]
pub use http::HttpClient;
// Reusable apply engine (git apply runner and helpers)
pub mod patch_apply;

View File

@@ -0,0 +1,132 @@
use crate::ApplyOutcome;
use crate::CloudBackend;
use crate::Error;
use crate::Result;
use crate::TaskId;
use crate::TaskStatus;
use crate::TaskSummary;
use chrono::Utc;
use codex_cloud_tasks_api::DiffSummary;
#[derive(Clone, Default)]
pub struct MockClient;
#[async_trait::async_trait]
impl CloudBackend for MockClient {
async fn list_tasks(&self, _env: Option<&str>) -> Result<Vec<TaskSummary>> {
// Slightly vary content by env to aid tests that rely on the mock
let rows = match _env {
Some("env-A") => vec![("T-2000", "A: First", TaskStatus::Ready)],
Some("env-B") => vec![
("T-3000", "B: One", TaskStatus::Ready),
("T-3001", "B: Two", TaskStatus::Pending),
],
_ => vec![
("T-1000", "Update README formatting", TaskStatus::Ready),
("T-1001", "Fix clippy warnings in core", TaskStatus::Pending),
("T-1002", "Add contributing guide", TaskStatus::Ready),
],
};
let environment_id = _env.map(|s| s.to_string());
let environment_label = match _env {
Some("env-A") => Some("Env A".to_string()),
Some("env-B") => Some("Env B".to_string()),
Some(other) => Some(format!("{other}")),
None => Some("Global".to_string()),
};
let mut out = Vec::new();
for (id_str, title, status) in rows {
let id = TaskId(id_str.to_string());
let diff = mock_diff_for(&id);
let (a, d) = count_from_unified(&diff);
out.push(TaskSummary {
id,
title: title.to_string(),
status,
updated_at: Utc::now(),
environment_id: environment_id.clone(),
environment_label: environment_label.clone(),
summary: DiffSummary {
files_changed: 1,
lines_added: a,
lines_removed: d,
},
});
}
Ok(out)
}
async fn get_task_diff(&self, id: TaskId) -> Result<String> {
Ok(mock_diff_for(&id))
}
async fn get_task_messages(&self, _id: TaskId) -> Result<Vec<String>> {
Ok(vec![
"Mock assistant output: this task contains no diff.".to_string(),
])
}
async fn apply_task(&self, id: TaskId) -> Result<ApplyOutcome> {
Ok(ApplyOutcome {
applied: true,
status: crate::ApplyStatus::Success,
message: format!("Applied task {} locally (mock)", id.0),
skipped_paths: Vec::new(),
conflict_paths: Vec::new(),
})
}
async fn create_task(
&self,
env_id: &str,
prompt: &str,
git_ref: &str,
qa_mode: bool,
) -> Result<codex_cloud_tasks_api::CreatedTask> {
let _ = (env_id, prompt, git_ref, qa_mode);
let id = format!("task_local_{}", chrono::Utc::now().timestamp_millis());
Ok(codex_cloud_tasks_api::CreatedTask { id: TaskId(id) })
}
}
fn mock_diff_for(id: &TaskId) -> String {
match id.0.as_str() {
"T-1000" => {
"diff --git a/README.md b/README.md\nindex 000000..111111 100644\n--- a/README.md\n+++ b/README.md\n@@ -1,2 +1,3 @@\n Intro\n-Hello\n+Hello, world!\n+Task: T-1000\n".to_string()
}
"T-1001" => {
"diff --git a/core/src/lib.rs b/core/src/lib.rs\nindex 000000..111111 100644\n--- a/core/src/lib.rs\n+++ b/core/src/lib.rs\n@@ -1,2 +1,1 @@\n-use foo;\n use bar;\n".to_string()
}
_ => {
"diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md\nindex 000000..111111 100644\n--- /dev/null\n+++ b/CONTRIBUTING.md\n@@ -0,0 +1,3 @@\n+## Contributing\n+Please open PRs.\n+Thanks!\n".to_string()
}
}
}
fn count_from_unified(diff: &str) -> (usize, usize) {
if let Ok(patch) = diffy::Patch::from_str(diff) {
patch
.hunks()
.iter()
.flat_map(|h| h.lines())
.fold((0, 0), |(a, d), l| match l {
diffy::Line::Insert(_) => (a + 1, d),
diffy::Line::Delete(_) => (a, d + 1),
_ => (a, d),
})
} else {
let mut a = 0;
let mut d = 0;
for l in diff.lines() {
if l.starts_with("+++") || l.starts_with("---") || l.starts_with("@@") {
continue;
}
match l.as_bytes().first() {
Some(b'+') => a += 1,
Some(b'-') => d += 1,
_ => {}
}
}
(a, d)
}
}

View File

@@ -0,0 +1,607 @@
#![allow(dead_code)]
use std::env;
use std::path::Path;
use std::path::PathBuf;
/// Patch classification used to choose normalization steps before applying.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PatchKind {
/// Codex Patch format beginning with `*** Begin Patch`.
CodexPatch,
/// Unified diff that includes either `diff --git` headers or just `---/+++` file headers.
GitUnified,
/// Body contains `@@` hunks but lacks required file headers.
HunkOnly,
/// Unknown/unsupported format.
Unknown,
}
/// How to handle whitespace in `git apply`.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum WhitespaceMode {
/// Default strict behavior.
Strict,
/// Equivalent to `--ignore-space-change`.
IgnoreSpaceChange,
/// Equivalent to `--whitespace=nowarn`.
WhitespaceNowarn,
}
/// How to treat CRLF conversions in `git`.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CrlfMode {
/// Use repo/user defaults.
Default,
/// Apply with `-c core.autocrlf=false -c core.safecrlf=false`.
NoAutoCrlfNoSafe,
}
/// Context for an apply operation.
#[derive(Debug, Clone)]
pub struct ApplyContext {
pub cwd: PathBuf,
pub whitespace: WhitespaceMode,
pub crlf_mode: CrlfMode,
}
/// High-level outcome of an apply attempt.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ApplyStatus {
Success,
Partial,
Error,
}
/// Structured result produced by the apply runner.
#[derive(Debug, Clone)]
pub struct ApplyResult {
pub status: ApplyStatus,
pub changed_paths: Vec<String>,
pub skipped_paths: Vec<String>,
pub conflict_paths: Vec<String>,
pub stdout_tail: String,
pub stderr_tail: String,
pub diagnostics: String,
}
/// Classify an incoming patch string by format.
pub fn classify_patch(s: &str) -> PatchKind {
let t = s.trim_start();
if t.starts_with("*** Begin Patch") {
return PatchKind::CodexPatch;
}
// Unified diffs can be either full git style or just `---`/`+++` file headers.
let has_diff_git = t.contains("\ndiff --git ") || t.starts_with("diff --git ");
let has_dash_headers = t.contains("\n--- ") && t.contains("\n+++ ");
let has_hunk = t.contains("\n@@ ") || t.starts_with("@@ ");
if has_diff_git || (has_dash_headers && has_hunk) {
return PatchKind::GitUnified;
}
if has_hunk {
return PatchKind::HunkOnly;
}
PatchKind::Unknown
}
/// Build an `ApplyContext` from environment variables.
///
/// Supported envs:
/// - `CODEX_APPLY_WHITESPACE` = `ignore-space-change` | `whitespace-nowarn` | `strict` (default)
/// - `CODEX_APPLY_CRLF` = `no-autocrlf-nosafe` | `default` (default)
pub fn context_from_env(cwd: PathBuf) -> ApplyContext {
let whitespace = match env::var("CODEX_APPLY_WHITESPACE").ok().as_deref() {
Some("ignore-space-change") => WhitespaceMode::IgnoreSpaceChange,
Some("whitespace-nowarn") => WhitespaceMode::WhitespaceNowarn,
_ => WhitespaceMode::Strict,
};
let crlf_mode = match env::var("CODEX_APPLY_CRLF").ok().as_deref() {
Some("no-autocrlf-nosafe") => CrlfMode::NoAutoCrlfNoSafe,
_ => CrlfMode::Default,
};
ApplyContext {
cwd,
whitespace,
crlf_mode,
}
}
/// Main entry point for applying a patch. This will be implemented in subsequent steps.
pub fn apply_patch(patch: &str, ctx: &ApplyContext) -> ApplyResult {
// Classify and convert if needed
let kind = classify_patch(patch);
let unified = match kind {
PatchKind::GitUnified => patch.to_string(),
PatchKind::CodexPatch => match convert_codex_patch_to_unified(patch, &ctx.cwd) {
Ok(u) => u,
Err(e) => {
return ApplyResult {
status: ApplyStatus::Error,
changed_paths: Vec::new(),
skipped_paths: Vec::new(),
conflict_paths: Vec::new(),
stdout_tail: String::new(),
stderr_tail: String::new(),
diagnostics: format!("failed to convert codex patch to unified diff: {e}"),
};
}
},
PatchKind::HunkOnly | PatchKind::Unknown => {
return ApplyResult {
status: ApplyStatus::Error,
changed_paths: Vec::new(),
skipped_paths: Vec::new(),
conflict_paths: Vec::new(),
stdout_tail: String::new(),
stderr_tail: String::new(),
diagnostics: format!(
"unsupported patch format: {kind:?}; need unified diff with file headers"
),
};
}
};
apply_unified(&unified, ctx)
}
fn apply_unified(unified_patch: &str, ctx: &ApplyContext) -> ApplyResult {
// 1) Ensure `git` exists
if let Err(e) = run_git(&ctx.cwd, &[], &["--version"]) {
return ApplyResult {
status: ApplyStatus::Error,
changed_paths: Vec::new(),
skipped_paths: Vec::new(),
conflict_paths: Vec::new(),
stdout_tail: String::new(),
stderr_tail: String::new(),
diagnostics: format!("git not available: {e}"),
};
}
// 2) Determine repo root
let repo_root = match run_git_capture(&ctx.cwd, &[], &["rev-parse", "--show-toplevel"]) {
Ok(out) if out.status == 0 => out.stdout.trim().to_string(),
Ok(out) => {
return ApplyResult {
status: ApplyStatus::Error,
changed_paths: Vec::new(),
skipped_paths: Vec::new(),
conflict_paths: Vec::new(),
stdout_tail: String::new(),
stderr_tail: String::new(),
diagnostics: format!(
"not a git repository (exit {}): {}",
out.status,
tail(&out.stderr)
),
};
}
Err(e) => {
return ApplyResult {
status: ApplyStatus::Error,
changed_paths: Vec::new(),
skipped_paths: Vec::new(),
conflict_paths: Vec::new(),
stdout_tail: String::new(),
stderr_tail: String::new(),
diagnostics: format!("git rev-parse failed: {e}"),
};
}
};
// 3) Temp file
let mut patch_path = std::env::temp_dir();
patch_path.push(format!("codex-apply-{}.diff", std::process::id()));
if let Err(e) = std::fs::write(&patch_path, unified_patch) {
return ApplyResult {
status: ApplyStatus::Error,
changed_paths: Vec::new(),
skipped_paths: Vec::new(),
conflict_paths: Vec::new(),
stdout_tail: String::new(),
stderr_tail: String::new(),
diagnostics: format!("failed to write temp patch: {e}"),
};
}
struct TempPatch(PathBuf);
impl Drop for TempPatch {
fn drop(&mut self) {
let _ = std::fs::remove_file(&self.0);
}
}
let _guard = TempPatch(patch_path.clone());
// 4) Preflight --check
let mut preflight_args: Vec<&str> = vec!["apply", "--check"];
push_whitespace_flags(&mut preflight_args, ctx.whitespace);
// Compute a shell-friendly representation of the preflight command for logging.
let preflight_cfg = crlf_cfg(ctx.crlf_mode);
let preflight_cmd = render_command_for_log(
&repo_root,
&preflight_cfg,
&prepend(&preflight_args, patch_path.to_string_lossy().as_ref()),
);
let preflight = run_git_capture(
Path::new(&repo_root),
preflight_cfg.as_slice(),
&prepend(&preflight_args, patch_path.to_string_lossy().as_ref()),
);
if let Ok(out) = &preflight {
if out.status != 0 {
return ApplyResult {
status: ApplyStatus::Error,
changed_paths: Vec::new(),
skipped_paths: Vec::new(),
conflict_paths: Vec::new(),
stdout_tail: tail(&out.stdout),
stderr_tail: tail(&out.stderr),
diagnostics: format!(
"git apply --check failed; working tree not modified; cmd: {preflight_cmd}"
),
};
}
} else if let Err(e) = preflight {
return ApplyResult {
status: ApplyStatus::Error,
changed_paths: Vec::new(),
skipped_paths: Vec::new(),
conflict_paths: Vec::new(),
stdout_tail: String::new(),
stderr_tail: String::new(),
diagnostics: format!("git apply --check failed to run: {e}; cmd: {preflight_cmd}"),
};
}
// 5) Snapshot before
let before = list_changed_paths(&repo_root);
// 6) Apply
let mut apply_args: Vec<&str> = vec!["apply", "--3way"];
push_whitespace_flags(&mut apply_args, ctx.whitespace);
let apply_cfg = crlf_cfg(ctx.crlf_mode);
let apply_cmd = render_command_for_log(
&repo_root,
&apply_cfg,
&prepend(&apply_args, patch_path.to_string_lossy().as_ref()),
);
let apply_out = run_git_capture(
Path::new(&repo_root),
apply_cfg.as_slice(),
&prepend(&apply_args, patch_path.to_string_lossy().as_ref()),
);
let mut result = ApplyResult {
status: ApplyStatus::Error,
changed_paths: Vec::new(),
skipped_paths: Vec::new(),
conflict_paths: Vec::new(),
stdout_tail: String::new(),
stderr_tail: String::new(),
diagnostics: String::new(),
};
match apply_out {
Ok(out) => {
result.stdout_tail = tail(&out.stdout);
result.stderr_tail = tail(&out.stderr);
result.conflict_paths = list_conflicts(&repo_root);
let mut skipped = parse_skipped_paths(&result.stdout_tail);
skipped.extend(parse_skipped_paths(&result.stderr_tail));
skipped.sort();
skipped.dedup();
result.skipped_paths = skipped;
let after = list_changed_paths(&repo_root);
result.changed_paths = set_delta(&before, &after);
result.status = if out.status == 0 {
ApplyStatus::Success
} else if !result.changed_paths.is_empty() || !result.conflict_paths.is_empty() {
ApplyStatus::Partial
} else {
ApplyStatus::Error
};
result.diagnostics = format!(
"git apply exit={} ({} changed, {} skipped, {} conflicts); cmd: {}",
out.status,
result.changed_paths.len(),
result.skipped_paths.len(),
result.conflict_paths.len(),
apply_cmd
);
}
Err(e) => {
result.status = ApplyStatus::Error;
result.diagnostics = format!("failed to run git apply: {e}; cmd: {apply_cmd}");
}
}
result
}
fn render_command_for_log(cwd: &str, git_cfg: &[&str], args: &[&str]) -> String {
fn quote(s: &str) -> String {
let simple = s
.chars()
.all(|c| c.is_ascii_alphanumeric() || "-_.:/@%+".contains(c));
if simple {
s.to_string()
} else {
format!("'{}'", s.replace('\'', "'\\''"))
}
}
let mut parts: Vec<String> = Vec::new();
parts.push("git".to_string());
for a in git_cfg {
parts.push(quote(a));
}
for a in args {
parts.push(quote(a));
}
format!("(cd {} && {})", quote(cwd), parts.join(" "))
}
fn convert_codex_patch_to_unified(patch: &str, cwd: &Path) -> Result<String, String> {
// Parse codex patch and verify paths relative to cwd
let argv = vec!["apply_patch".to_string(), patch.to_string()];
let verified = codex_apply_patch::maybe_parse_apply_patch_verified(&argv, cwd);
match verified {
codex_apply_patch::MaybeApplyPatchVerified::Body(action) => {
let mut parts: Vec<String> = Vec::new();
for (abs_path, change) in action.changes() {
let rel_path = abs_path.strip_prefix(cwd).unwrap_or(abs_path);
let rel_str = rel_path.to_string_lossy();
match change {
codex_apply_patch::ApplyPatchFileChange::Add { content } => {
let header = format!(
"diff --git a/{rel_str} b/{rel_str}
new file mode 100644
--- /dev/null
+++ b/{rel_str}
"
);
let body = build_add_hunk(content);
parts.push(format!("{header}{body}"));
}
codex_apply_patch::ApplyPatchFileChange::Delete { .. } => {
let header = format!(
"diff --git a/{rel_str} b/{rel_str}
deleted file mode 100644
--- a/{rel_str}
+++ /dev/null
"
);
parts.push(header);
}
codex_apply_patch::ApplyPatchFileChange::Update {
unified_diff,
move_path,
..
} => {
let new_rel = move_path
.as_ref()
.map(|p| {
p.strip_prefix(cwd)
.unwrap_or(p)
.to_string_lossy()
.to_string()
})
.unwrap_or_else(|| rel_str.to_string());
let header = format!(
"diff --git a/{rel_str} b/{new_rel}
--- a/{rel_str}
+++ b/{new_rel}
"
);
parts.push(format!("{header}{unified_diff}"));
}
}
}
if parts.is_empty() {
Err("empty patch after conversion".to_string())
} else {
Ok(parts.join("\n"))
}
}
codex_apply_patch::MaybeApplyPatchVerified::CorrectnessError(e) => {
Err(format!("patch correctness: {e}"))
}
codex_apply_patch::MaybeApplyPatchVerified::ShellParseError(e) => {
Err(format!("shell parse: {e:?}"))
}
_ => Err("not an apply_patch payload".to_string()),
}
}
fn build_add_hunk(content: &str) -> String {
let norm = content.replace("\r\n", "\n");
let mut lines: Vec<&str> = norm.split('\n').collect();
if let Some("") = lines.last().copied() {
lines.pop();
}
let count = lines.len();
if count == 0 {
return String::new();
}
let mut out = String::new();
out.push_str(&format!("@@ -0,0 +1,{count} @@\n"));
for l in lines {
out.push('+');
out.push_str(l);
out.push('\n');
}
out
}
fn push_whitespace_flags(args: &mut Vec<&str>, mode: WhitespaceMode) {
match mode {
WhitespaceMode::Strict => {}
WhitespaceMode::IgnoreSpaceChange => args.push("--ignore-space-change"),
WhitespaceMode::WhitespaceNowarn => {
args.push("--whitespace");
args.push("nowarn");
}
}
}
fn crlf_cfg(mode: CrlfMode) -> Vec<&'static str> {
match mode {
CrlfMode::Default => vec![],
CrlfMode::NoAutoCrlfNoSafe => {
vec!["-c", "core.autocrlf=false", "-c", "core.safecrlf=false"]
}
}
}
fn prepend<'a>(base: &'a [&'a str], tail: &'a str) -> Vec<&'a str> {
let mut v = base.to_vec();
v.push(tail);
v
}
struct GitOutput {
status: i32,
stdout: String,
stderr: String,
}
fn run_git(cwd: &std::path::Path, git_cfg: &[&str], args: &[&str]) -> std::io::Result<()> {
let status = std::process::Command::new("git")
.args(git_cfg)
.args(args)
.current_dir(cwd)
.status()?;
if status.success() {
Ok(())
} else {
Err(std::io::Error::other(format!(
"git {:?} exited {}",
args,
status.code().unwrap_or(-1)
)))
}
}
fn run_git_capture(
cwd: &std::path::Path,
git_cfg: &[&str],
args: &[&str],
) -> std::io::Result<GitOutput> {
let out = std::process::Command::new("git")
.args(git_cfg)
.args(args)
.current_dir(cwd)
.output()?;
Ok(GitOutput {
status: out.status.code().unwrap_or(-1),
stdout: String::from_utf8_lossy(&out.stdout).into_owned(),
stderr: String::from_utf8_lossy(&out.stderr).into_owned(),
})
}
fn list_changed_paths(repo_root: &str) -> Vec<String> {
let cwd = std::path::Path::new(repo_root);
match run_git_capture(cwd, &[], &["diff", "--name-only"]) {
Ok(out) if out.status == 0 => out
.stdout
.lines()
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.collect(),
_ => Vec::new(),
}
}
fn list_conflicts(repo_root: &str) -> Vec<String> {
let cwd = std::path::Path::new(repo_root);
match run_git_capture(cwd, &[], &["ls-files", "-u"]) {
Ok(out) if out.status == 0 => {
let mut set = std::collections::BTreeSet::new();
for line in out.stdout.lines() {
// format: <mode> <sha> <stage>\t<path>
if let Some((_meta, path)) = line.split_once('\t') {
set.insert(path.trim().to_string());
}
}
set.into_iter().collect()
}
_ => Vec::new(),
}
}
fn parse_skipped_paths(text: &str) -> Vec<String> {
let mut out = Vec::new();
for line in text.lines() {
let l = line.trim();
// error: path/to/file.txt does not match index
if let Some(rest) = l.strip_prefix("error:") {
let rest = rest.trim();
if let Some(p) = rest.strip_suffix("does not match index") {
let p = p.trim().trim_end_matches(':').trim();
if !p.is_empty() {
out.push(p.to_string());
}
continue;
}
}
// patch failed: path/to/file.txt: content
if let Some(rest) = l.strip_prefix("patch failed:") {
let rest = rest.trim();
if let Some((p, _)) = rest.split_once(':') {
let p = p.trim();
if !p.is_empty() {
out.push(p.to_string());
}
}
}
}
out
}
fn tail(s: &str) -> String {
const MAX: usize = 2000;
if s.len() <= MAX {
s.to_string()
} else {
s[s.len() - MAX..].to_string()
}
}
fn set_delta(before: &[String], after: &[String]) -> Vec<String> {
use std::collections::BTreeSet;
let b: BTreeSet<_> = before.iter().collect();
let a: BTreeSet<_> = after.iter().collect();
a.difference(&b).map(|s| (*s).clone()).collect()
}
/// Synthesize a unified git diff for a single file from a bare hunk body.
pub fn synthesize_unified_single_file(hunk_body: &str, old_path: &str, new_path: &str) -> String {
// Ensure body ends with newline
let mut body = hunk_body.to_string();
if !body.ends_with("\n") {
body.push('\n');
}
format!(
"diff --git a/{old_path} b/{new_path}
--- a/{old_path}
+++ b/{new_path}
{body}"
)
}
/// Split a bare hunk body into per-file segments using a conservative delimiter.
/// We look for lines that equal "*** End of File" (as emitted by our apply-patch format)
/// and use that to separate bodies for multiple files.
pub fn split_hunk_body_into_files(body: &str) -> Vec<String> {
let mut chunks: Vec<String> = Vec::new();
let mut cur = String::new();
for line in body.lines() {
if line.trim() == "*** End of File" {
if !cur.is_empty() {
cur.push('\n');
chunks.push(cur);
cur = String::new();
}
} else {
cur.push_str(line);
cur.push('\n');
}
}
if !cur.trim().is_empty() {
chunks.push(cur);
}
chunks
}