Compare commits

...

7 Commits

Author SHA1 Message Date
Eric Traut
5910c104a4 Release 0.66.0-alpha.5 2025-12-04 16:51:20 -08:00
jif-oai
6736d1828d fix: sse for chat (#7594) 2025-12-04 16:46:56 -08:00
Dylan Hurd
073a8533b8 chore(apply-patch) scenarios for e2e testing (#7567)
## Summary
This PR introduces an End to End test suite for apply-patch, so we can
easily validate behavior against other implementations as well.

## Testing
- [x] These are tests
2025-12-05 00:20:54 +00:00
Michael Bolin
0972cd9404 chore: refactor to move Arc<RwLock> concern outside exec_policy_for (#7615)
The caller should decide whether wrapping the policy in `Arc<RwLock>` is
necessary. This should make https://github.com/openai/codex/pull/7609 a
bit smoother.

- `exec_policy_for()` -> `load_exec_policy_for_features()`
- introduce `load_exec_policy()` that does not take `Features` as an arg
- both return `Result<Policy, ExecPolicyError>` instead of
Result<Arc<RwLock<Policy>>, ExecPolicyError>`

This simplifies the tests as they have no need for `Arc<RwLock>`.
2025-12-04 15:13:27 -08:00
Robby He
28dcdb566a Fix handle_shortcut_overlay_key for cross-platform consistency (#7583)
**Summary**
- Shortcut toggle using `?` in `handle_shortcut_overlay_key` fails to
trigger on some platforms (notably Windows). Current match requires
`KeyCode::Char('?')` with `KeyModifiers::NONE`. Some terminals set
`SHIFT` when producing `?` (since it is typically `Shift + /`), so the
strict `NONE` check prevents toggling.

**Impact**
- On Windows consoles/terminals, pressing `?` with an empty composer
often does nothing, leading to inconsistent UX compared to macOS/Linux.

**Root Cause**
- Crossterm/terminal backends report modifiers inconsistently across
platforms. Generating `?` may include `SHIFT`. The code enforces
`modifiers == NONE`, so valid `?` presses with `SHIFT` are ignored.
AltGr keyboards may also surface as `ALT`.

**Repro Steps**
- Open the TUI, ensure the composer is empty.
- Press `?`.
- Expected: Shortcut overlay toggles.
- Actual (Windows frequently): No toggle occurs.

**Fix Options**
- Option 1 (preferred): Accept `?` regardless of `SHIFT`, but reject
`CONTROL` and `ALT`.
- Rationale: Keeps behavior consistent across platforms with minimal
code change.
	- Example change:
		- Before: matching `KeyModifiers::NONE` only.
		- After: allow `SHIFT`, disallow `CONTROL | ALT`.
		- Suggested condition:
			```rust
			let toggles = matches!(key_event.code, KeyCode::Char('?'))
&& !key_event.modifiers.intersects(KeyModifiers::CONTROL |
KeyModifiers::ALT)
					&& self.is_empty();
			```

- Option 2: Platform-specific handling (Windows vs non-Windows).
- Implement two variants or conditional branches using `#[cfg(target_os
= "windows")]`.
- On Windows, accept `?` with `SHIFT`; on other platforms, retain
current behavior.
- Trade-off: Higher maintenance burden and code divergence for limited
benefit.

---

close #5495
2025-12-04 14:56:58 -08:00
Owen Lin
e8f6d65899 fix(app-server): add will_retry to ErrorNotification (#7611)
VSCE renders `codex/event/stream_error` (automatically retried, e.g.
`"Reconnecting... 1/n"`) and `codex/event/error` (terminal errors)
differently, so add `will_retry` on ErrorNotification to indicate this.
2025-12-04 21:48:37 +00:00
Owen Lin
342c084cc3 fix(app-server): add duration_ms to McpToolCallItem (#7605)
Seems like a nice field to have, and also VSCE does render this one.
2025-12-04 13:45:07 -08:00
58 changed files with 358 additions and 31 deletions

View File

@@ -47,7 +47,7 @@ members = [
resolver = "2"
[workspace.package]
version = "0.0.0"
version = "0.66.0-alpha.5"
# Track the edition for all workspace crates in one place. Individual
# crates can still override this value, but keeping it here means new
# crates created with `cargo new -w ...` automatically inherit the 2024

View File

@@ -942,6 +942,9 @@ pub struct TurnError {
#[ts(export_to = "v2/")]
pub struct ErrorNotification {
pub error: TurnError,
// Set to true if the error is transient and the app-server process will automatically retry.
// If true, this will not interrupt a turn.
pub will_retry: bool,
pub thread_id: String,
pub turn_id: String,
}
@@ -1141,6 +1144,9 @@ pub enum ThreadItem {
arguments: JsonValue,
result: Option<McpToolCallResult>,
error: Option<McpToolCallError>,
/// The duration of the MCP tool call in milliseconds.
#[ts(type = "number | null")]
duration_ms: Option<i64>,
},
#[serde(rename_all = "camelCase")]
#[ts(rename_all = "camelCase")]

View File

@@ -333,6 +333,7 @@ pub(crate) async fn apply_bespoke_event_handling(
outgoing
.send_server_notification(ServerNotification::Error(ErrorNotification {
error: turn_error,
will_retry: false,
thread_id: conversation_id.to_string(),
turn_id: event_turn_id.clone(),
}))
@@ -348,6 +349,7 @@ pub(crate) async fn apply_bespoke_event_handling(
outgoing
.send_server_notification(ServerNotification::Error(ErrorNotification {
error: turn_error,
will_retry: true,
thread_id: conversation_id.to_string(),
turn_id: event_turn_id.clone(),
}))
@@ -1178,6 +1180,7 @@ async fn construct_mcp_tool_call_notification(
arguments: begin_event.invocation.arguments.unwrap_or(JsonValue::Null),
result: None,
error: None,
duration_ms: None,
};
ItemStartedNotification {
thread_id,
@@ -1197,6 +1200,7 @@ async fn construct_mcp_tool_call_end_notification(
} else {
McpToolCallStatus::Failed
};
let duration_ms = i64::try_from(end_event.duration.as_millis()).ok();
let (result, error) = match &end_event.result {
Ok(value) => (
@@ -1222,6 +1226,7 @@ async fn construct_mcp_tool_call_end_notification(
arguments: end_event.invocation.arguments.unwrap_or(JsonValue::Null),
result,
error,
duration_ms,
};
ItemCompletedNotification {
thread_id,
@@ -1598,6 +1603,7 @@ mod tests {
arguments: serde_json::json!({"server": ""}),
result: None,
error: None,
duration_ms: None,
},
};
@@ -1751,6 +1757,7 @@ mod tests {
arguments: JsonValue::Null,
result: None,
error: None,
duration_ms: None,
},
};
@@ -1804,6 +1811,7 @@ mod tests {
structured_content: None,
}),
error: None,
duration_ms: Some(0),
},
};
@@ -1845,6 +1853,7 @@ mod tests {
error: Some(McpToolCallError {
message: "boom".to_string(),
}),
duration_ms: Some(1),
},
};

View File

@@ -0,0 +1 @@
** text eol=lf

View File

@@ -0,0 +1 @@
This is a new file

View File

@@ -0,0 +1,4 @@
*** Begin Patch
*** Add File: bar.md
+This is a new file
*** End Patch

View File

@@ -0,0 +1,2 @@
line1
changed

View File

@@ -0,0 +1 @@
obsolete

View File

@@ -0,0 +1,2 @@
line1
line2

View File

@@ -0,0 +1,9 @@
*** Begin Patch
*** Add File: nested/new.txt
+created
*** Delete File: delete.txt
*** Update File: modify.txt
@@
-line2
+changed
*** End Patch

View File

@@ -0,0 +1,4 @@
line1
changed2
line3
changed4

View File

@@ -0,0 +1,4 @@
line1
line2
line3
line4

View File

@@ -0,0 +1,9 @@
*** Begin Patch
*** Update File: multi.txt
@@
-line2
+changed2
@@
-line4
+changed4
*** End Patch

View File

@@ -0,0 +1 @@
unrelated file

View File

@@ -0,0 +1 @@
old content

View File

@@ -0,0 +1 @@
unrelated file

View File

@@ -0,0 +1,7 @@
*** Begin Patch
*** Update File: old/name.txt
*** Move to: renamed/dir/name.txt
@@
-old content
+new content
*** End Patch

View File

@@ -0,0 +1,2 @@
*** Begin Patch
*** End Patch

View File

@@ -0,0 +1,2 @@
line1
line2

View File

@@ -0,0 +1,2 @@
line1
line2

View File

@@ -0,0 +1,6 @@
*** Begin Patch
*** Update File: modify.txt
@@
-missing
+changed
*** End Patch

View File

@@ -0,0 +1,3 @@
*** Begin Patch
*** Delete File: missing.txt
*** End Patch

View File

@@ -0,0 +1,3 @@
*** Begin Patch
*** Update File: foo.txt
*** End Patch

View File

@@ -0,0 +1,6 @@
*** Begin Patch
*** Update File: missing.txt
@@
-old
+new
*** End Patch

View File

@@ -0,0 +1,7 @@
*** Begin Patch
*** Update File: old/name.txt
*** Move to: renamed/dir/name.txt
@@
-from
+new
*** End Patch

View File

@@ -0,0 +1,4 @@
*** Begin Patch
*** Add File: duplicate.txt
+new content
*** End Patch

View File

@@ -0,0 +1,3 @@
*** Begin Patch
*** Delete File: dir
*** End Patch

View File

@@ -0,0 +1,3 @@
*** Begin Patch
*** Frobnicate File: foo
*** End Patch

View File

@@ -0,0 +1,2 @@
first line
second line

View File

@@ -0,0 +1,7 @@
*** Begin Patch
*** Update File: no_newline.txt
@@
-no newline at end
+first line
+second line
*** End Patch

View File

@@ -0,0 +1,8 @@
*** Begin Patch
*** Add File: created.txt
+hello
*** Update File: missing.txt
@@
-old
+new
*** End Patch

View File

@@ -0,0 +1,4 @@
line1
line2
added line 1
added line 2

View File

@@ -0,0 +1,2 @@
line1
line2

View File

@@ -0,0 +1,6 @@
*** Begin Patch
*** Update File: input.txt
@@
+added line 1
+added line 2
*** End Patch

View File

@@ -0,0 +1,6 @@
*** Begin Patch
*** Update File: foo.txt
@@
-old
+new
*** End Patch

View File

@@ -0,0 +1,6 @@
*** Begin Patch
*** Update File: file.txt
@@
-one
+two
*** End Patch

View File

@@ -0,0 +1,18 @@
# Overview
This directory is a collection of end to end tests for the apply-patch specification, meant to be easily portable to other languages or platforms.
# Specification
Each test case is one directory, composed of input state (input/), the patch operation (patch.txt), and the expected final state (expected/). This structure is designed to keep tests simple (i.e. test exactly one patch at a time) while still providing enough flexibility to test any given operation across files.
Here's what this would look like for a simple test apply-patch test case to create a new file:
```
001_add/
input/
foo.md
expected/
foo.md
bar.md
patch.txt
```

View File

@@ -1,3 +1,4 @@
mod cli;
mod scenarios;
#[cfg(not(target_os = "windows"))]
mod tool;

View File

@@ -0,0 +1,114 @@
use assert_cmd::prelude::*;
use pretty_assertions::assert_eq;
use std::collections::BTreeMap;
use std::fs;
use std::path::Path;
use std::path::PathBuf;
use std::process::Command;
use tempfile::tempdir;
#[test]
fn test_apply_patch_scenarios() -> anyhow::Result<()> {
for scenario in fs::read_dir("tests/fixtures/scenarios")? {
let scenario = scenario?;
let path = scenario.path();
if path.is_dir() {
run_apply_patch_scenario(&path)?;
}
}
Ok(())
}
/// Reads a scenario directory, copies the input files to a temporary directory, runs apply-patch,
/// and asserts that the final state matches the expected state exactly.
fn run_apply_patch_scenario(dir: &Path) -> anyhow::Result<()> {
let tmp = tempdir()?;
// Copy the input files to the temporary directory
let input_dir = dir.join("input");
if input_dir.is_dir() {
copy_dir_recursive(&input_dir, tmp.path())?;
}
// Read the patch.txt file
let patch = fs::read_to_string(dir.join("patch.txt"))?;
// Run apply_patch in the temporary directory. We intentionally do not assert
// on the exit status here; the scenarios are specified purely in terms of
// final filesystem state, which we compare below.
Command::cargo_bin("apply_patch")?
.arg(patch)
.current_dir(tmp.path())
.output()?;
// Assert that the final state matches the expected state exactly
let expected_dir = dir.join("expected");
let expected_snapshot = snapshot_dir(&expected_dir)?;
let actual_snapshot = snapshot_dir(tmp.path())?;
assert_eq!(
actual_snapshot,
expected_snapshot,
"Scenario {} did not match expected final state",
dir.display()
);
Ok(())
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum Entry {
File(Vec<u8>),
Dir,
}
fn snapshot_dir(root: &Path) -> anyhow::Result<BTreeMap<PathBuf, Entry>> {
let mut entries = BTreeMap::new();
if root.is_dir() {
snapshot_dir_recursive(root, root, &mut entries)?;
}
Ok(entries)
}
fn snapshot_dir_recursive(
base: &Path,
dir: &Path,
entries: &mut BTreeMap<PathBuf, Entry>,
) -> anyhow::Result<()> {
for entry in fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
let Some(stripped) = path.strip_prefix(base).ok() else {
continue;
};
let rel = stripped.to_path_buf();
let file_type = entry.file_type()?;
if file_type.is_dir() {
entries.insert(rel.clone(), Entry::Dir);
snapshot_dir_recursive(base, &path, entries)?;
} else if file_type.is_file() {
let contents = fs::read(&path)?;
entries.insert(rel, Entry::File(contents));
}
}
Ok(())
}
fn copy_dir_recursive(src: &Path, dst: &Path) -> anyhow::Result<()> {
for entry in fs::read_dir(src)? {
let entry = entry?;
let path = entry.path();
let file_type = entry.file_type()?;
let dest_path = dst.join(entry.file_name());
if file_type.is_dir() {
fs::create_dir_all(&dest_path)?;
copy_dir_recursive(&path, &dest_path)?;
} else if file_type.is_file() {
if let Some(parent) = dest_path.parent() {
fs::create_dir_all(parent)?;
}
fs::copy(&path, &dest_path)?;
}
}
Ok(())
}

View File

@@ -161,8 +161,10 @@ pub async fn process_chat_sse<S>(
}
if let Some(func) = tool_call.get("function") {
if let Some(fname) = func.get("name").and_then(|n| n.as_str()) {
call_state.name = Some(fname.to_string());
if let Some(fname) = func.get("name").and_then(|n| n.as_str())
&& !fname.is_empty()
{
call_state.name.get_or_insert_with(|| fname.to_string());
}
if let Some(arguments) = func.get("arguments").and_then(|a| a.as_str())
{
@@ -432,6 +434,47 @@ mod tests {
);
}
#[tokio::test]
async fn preserves_tool_call_name_when_empty_deltas_arrive() {
let delta_with_name = json!({
"choices": [{
"delta": {
"tool_calls": [{
"id": "call_a",
"function": { "name": "do_a" }
}]
}
}]
});
let delta_with_empty_name = json!({
"choices": [{
"delta": {
"tool_calls": [{
"id": "call_a",
"function": { "name": "", "arguments": "{}" }
}]
}
}]
});
let finish = json!({
"choices": [{
"finish_reason": "tool_calls"
}]
});
let body = build_body(&[delta_with_name, delta_with_empty_name, finish]);
let events = collect_events(&body).await;
assert_matches!(
&events[..],
[
ResponseEvent::OutputItemDone(ResponseItem::FunctionCall { name, arguments, .. }),
ResponseEvent::Completed { .. }
] if name == "do_a" && arguments == "{}"
);
}
#[tokio::test]
async fn emits_tool_calls_even_when_content_and_reasoning_present() {
let delta_content_and_tools = json!({

View File

@@ -11,6 +11,7 @@ use crate::compact;
use crate::compact::run_inline_auto_compact_task;
use crate::compact::should_use_remote_compact_task;
use crate::compact_remote::run_inline_remote_auto_compact_task;
use crate::exec_policy::load_exec_policy_for_features;
use crate::features::Feature;
use crate::features::Features;
use crate::openai_models::models_manager::ModelsManager;
@@ -174,9 +175,10 @@ impl Codex {
let user_instructions = get_user_instructions(&config).await;
let exec_policy = crate::exec_policy::exec_policy_for(&config.features, &config.codex_home)
let exec_policy = load_exec_policy_for_features(&config.features, &config.codex_home)
.await
.map_err(|err| CodexErr::Fatal(format!("failed to load execpolicy: {err}")))?;
let exec_policy = Arc::new(RwLock::new(exec_policy));
let config = Arc::new(config);

View File

@@ -73,14 +73,18 @@ pub enum ExecPolicyUpdateError {
FeatureDisabled,
}
pub(crate) async fn exec_policy_for(
pub(crate) async fn load_exec_policy_for_features(
features: &Features,
codex_home: &Path,
) -> Result<Arc<RwLock<Policy>>, ExecPolicyError> {
) -> Result<Policy, ExecPolicyError> {
if !features.enabled(Feature::ExecPolicy) {
return Ok(Arc::new(RwLock::new(Policy::empty())));
Ok(Policy::empty())
} else {
load_exec_policy(codex_home).await
}
}
pub async fn load_exec_policy(codex_home: &Path) -> Result<Policy, ExecPolicyError> {
let policy_dir = codex_home.join(POLICY_DIR_NAME);
let policy_paths = collect_policy_files(&policy_dir).await?;
@@ -102,7 +106,7 @@ pub(crate) async fn exec_policy_for(
})?;
}
let policy = Arc::new(RwLock::new(parser.build()));
let policy = parser.build();
tracing::debug!(
"loaded execpolicy from {} files in {}",
policy_paths.len(),
@@ -306,7 +310,7 @@ mod tests {
features.disable(Feature::ExecPolicy);
let temp_dir = tempdir().expect("create temp dir");
let policy = exec_policy_for(&features, temp_dir.path())
let policy = load_exec_policy_for_features(&features, temp_dir.path())
.await
.expect("policy result");
@@ -319,10 +323,7 @@ mod tests {
decision: Decision::Allow
}],
},
policy
.read()
.await
.check_multiple(commands.iter(), &|_| Decision::Allow)
policy.check_multiple(commands.iter(), &|_| Decision::Allow)
);
assert!(!temp_dir.path().join(POLICY_DIR_NAME).exists());
}
@@ -350,7 +351,7 @@ mod tests {
)
.expect("write policy file");
let policy = exec_policy_for(&Features::with_defaults(), temp_dir.path())
let policy = load_exec_policy(temp_dir.path())
.await
.expect("policy result");
let command = [vec!["rm".to_string()]];
@@ -362,10 +363,7 @@ mod tests {
decision: Decision::Forbidden
}],
},
policy
.read()
.await
.check_multiple(command.iter(), &|_| Decision::Allow)
policy.check_multiple(command.iter(), &|_| Decision::Allow)
);
}
@@ -378,7 +376,7 @@ mod tests {
)
.expect("write policy file");
let policy = exec_policy_for(&Features::with_defaults(), temp_dir.path())
let policy = load_exec_policy(temp_dir.path())
.await
.expect("policy result");
let command = [vec!["ls".to_string()]];
@@ -390,10 +388,7 @@ mod tests {
decision: Decision::Allow
}],
},
policy
.read()
.await
.check_multiple(command.iter(), &|_| Decision::Allow)
policy.check_multiple(command.iter(), &|_| Decision::Allow)
);
}

View File

@@ -1504,14 +1504,9 @@ impl ChatComposer {
return false;
}
let toggles = matches!(
key_event,
KeyEvent {
code: KeyCode::Char('?'),
modifiers: KeyModifiers::NONE,
..
} if self.is_empty()
);
let toggles = matches!(key_event.code, KeyCode::Char('?'))
&& !has_ctrl_or_alt(key_event.modifiers)
&& self.is_empty();
if !toggles {
return false;