Compare commits

...

5 Commits

Author SHA1 Message Date
Eric Traut
37a429f363 Merge branch 'main' into etraut/nested_apply_patch 2026-04-05 22:43:13 -07:00
Eric Traut
3a1db1cb53 Fix read-only apply_patch rejection message 2026-04-05 22:30:14 -07:00
Eric Traut
d9b899309d Fix misleading codex exec help usage (#16881)
Addresses #15535

Problem: `codex exec --help` advertised a second positional `[COMMAND]`
even though `exec` only accepts a prompt or a subcommand.

Solution: Override the `exec` usage string so the help output shows the
two supported invocation forms instead of the phantom positional.
2026-04-05 22:09:19 -07:00
Eric Traut
b5edeb98a0 Fix flaky permissions escalation test on Windows (#16825)
Problem: `rejects_escalated_permissions_when_policy_not_on_request`
retried a real shell command after asserting the escalation rejection,
so Windows CI could fail on command startup timing instead of approval
behavior.

Solution: Keep the rejection assertion, verify no turn permissions were
granted, and assert through exec-policy evaluation that the same command
would be allowed without escalation instead of timing a subprocess.
2026-04-05 10:51:01 -07:00
Eric Traut
152b676597 Fix flaky test relating to metadata remote URL (#16823)
This test was flaking on Windows.

Problem: The Windows CI test for turn metadata compared git remote URLs
byte-for-byte even though equivalent remotes can be formatted
differently across Git code paths.

Solution: Normalize the expected and actual origin URLs in the test by
trimming whitespace, removing a trailing slash, and stripping a trailing
.git suffix before comparing.
2026-04-05 10:50:29 -07:00
5 changed files with 86 additions and 77 deletions

View File

@@ -43,7 +43,6 @@ use crate::state::TaskKind;
use crate::tasks::SessionTask;
use crate::tasks::SessionTaskContext;
use crate::tools::ToolRouter;
use crate::tools::context::FunctionToolOutput;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolPayload;
use crate::tools::handlers::ShellHandler;
@@ -120,12 +119,6 @@ use std::time::Duration as StdDuration;
#[path = "codex_tests_guardian.rs"]
mod guardian_tests;
use codex_protocol::models::function_call_output_content_items_to_text;
fn expect_text_tool_output(output: &FunctionToolOutput) -> String {
function_call_output_content_items_to_text(&output.body).unwrap_or_default()
}
struct InstructionsTestCase {
slug: &'static str,
expects_apply_patch_instructions: bool,
@@ -5348,7 +5341,9 @@ async fn sample_rollout(
#[tokio::test]
async fn rejects_escalated_permissions_when_policy_not_on_request() {
use crate::exec::ExecParams;
use crate::exec_policy::ExecApprovalRequest;
use crate::sandboxing::SandboxPermissions;
use crate::tools::sandboxing::ExecApprovalRequirement;
use crate::turn_diff_tracker::TurnDiffTracker;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::SandboxPolicy;
@@ -5394,23 +5389,6 @@ async fn rejects_escalated_permissions_when_policy_not_on_request() {
arg0: None,
};
let params2 = ExecParams {
sandbox_permissions: SandboxPermissions::UseDefault,
command: params.command.clone(),
cwd: params.cwd.clone(),
expiration: timeout_ms.into(),
capture_policy: ExecCapturePolicy::ShellTool,
env: HashMap::new(),
network: None,
windows_sandbox_level: turn_context.windows_sandbox_level,
windows_sandbox_private_desktop: turn_context
.config
.permissions
.windows_sandbox_private_desktop,
justification: params.justification.clone(),
arg0: None,
};
let turn_diff_tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
let tool_name = "shell";
@@ -5448,9 +5426,11 @@ async fn rejects_escalated_permissions_when_policy_not_on_request() {
);
pretty_assertions::assert_eq!(output, expected);
pretty_assertions::assert_eq!(session.granted_turn_permissions().await, None);
// Now retry the same command WITHOUT escalated permissions; should succeed.
// Force DangerFullAccess to avoid platform sandbox dependencies in tests.
// The rejection should not poison the non-escalated path for the same
// command. Force DangerFullAccess so this check stays focused on approval
// policy rather than platform-specific sandbox behavior.
let turn_context_mut = Arc::get_mut(&mut turn_context).expect("unique turn context Arc");
turn_context_mut
.sandbox_policy
@@ -5461,45 +5441,22 @@ async fn rejects_escalated_permissions_when_policy_not_on_request() {
turn_context_mut.network_sandbox_policy =
NetworkSandboxPolicy::from(turn_context_mut.sandbox_policy.get());
let resp2 = handler
.handle(ToolInvocation {
session: Arc::clone(&session),
turn: Arc::clone(&turn_context),
tracker: Arc::clone(&turn_diff_tracker),
call_id: "test-call-2".to_string(),
tool_name: tool_name.to_string(),
tool_namespace: None,
payload: ToolPayload::Function {
arguments: serde_json::json!({
"command": params2.command.clone(),
"workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
"timeout_ms": params2.expiration.timeout_ms(),
"sandbox_permissions": params2.sandbox_permissions,
"justification": params2.justification.clone(),
})
.to_string(),
},
let exec_approval_requirement = session
.services
.exec_policy
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
command: &params.command,
approval_policy: turn_context.approval_policy.value(),
sandbox_policy: turn_context.sandbox_policy.get(),
file_system_sandbox_policy: &turn_context.file_system_sandbox_policy,
sandbox_permissions: SandboxPermissions::UseDefault,
prefix_rule: None,
})
.await;
let output = expect_text_tool_output(&resp2.expect("expected Ok result"));
#[derive(Deserialize, PartialEq, Eq, Debug)]
struct ResponseExecMetadata {
exit_code: i32,
}
#[derive(Deserialize)]
struct ResponseExecOutput {
output: String,
metadata: ResponseExecMetadata,
}
let exec_output: ResponseExecOutput =
serde_json::from_str(&output).expect("valid exec output json");
pretty_assertions::assert_eq!(exec_output.metadata, ResponseExecMetadata { exit_code: 0 });
assert!(exec_output.output.contains("hi"));
assert!(matches!(
exec_approval_requirement,
ExecApprovalRequirement::Skip { .. }
));
}
#[tokio::test]
async fn unified_exec_rejects_escalated_permissions_when_policy_not_on_request() {

View File

@@ -12,6 +12,11 @@ use codex_protocol::protocol::SandboxPolicy;
use codex_sandboxing::SandboxType;
use codex_sandboxing::get_platform_sandbox;
const PATCH_REJECTED_OUTSIDE_PROJECT_REASON: &str =
"writing outside of the project; rejected by user approval settings";
const PATCH_REJECTED_READ_ONLY_REASON: &str =
"writing is blocked by read-only sandbox; rejected by user approval settings";
#[derive(Debug, PartialEq)]
pub enum SafetyCheck {
AutoApprove {
@@ -85,9 +90,7 @@ pub fn assess_patch_safety(
None => {
if rejects_sandbox_approval {
SafetyCheck::Reject {
reason:
"writing outside of the project; rejected by user approval settings"
.to_string(),
reason: patch_rejection_reason(sandbox_policy).to_string(),
}
} else {
SafetyCheck::AskUser
@@ -97,14 +100,22 @@ pub fn assess_patch_safety(
}
} else if rejects_sandbox_approval {
SafetyCheck::Reject {
reason: "writing outside of the project; rejected by user approval settings"
.to_string(),
reason: patch_rejection_reason(sandbox_policy).to_string(),
}
} else {
SafetyCheck::AskUser
}
}
fn patch_rejection_reason(sandbox_policy: &SandboxPolicy) -> &'static str {
match sandbox_policy {
SandboxPolicy::ReadOnly { .. } => PATCH_REJECTED_READ_ONLY_REASON,
SandboxPolicy::WorkspaceWrite { .. }
| SandboxPolicy::DangerFullAccess
| SandboxPolicy::ExternalSandbox { .. } => PATCH_REJECTED_OUTSIDE_PROJECT_REASON,
}
}
fn is_write_patch_constrained_to_writable_paths(
action: &ApplyPatchAction,
file_system_sandbox_policy: &FileSystemSandboxPolicy,

View File

@@ -162,8 +162,36 @@ fn granular_sandbox_approval_false_rejects_out_of_root_patch() {
WindowsSandboxLevel::Disabled,
),
SafetyCheck::Reject {
reason: "writing outside of the project; rejected by user approval settings"
.to_string(),
reason: PATCH_REJECTED_OUTSIDE_PROJECT_REASON.to_string(),
},
);
}
#[test]
fn read_only_policy_rejects_patch_with_read_only_reason() {
let tmp = TempDir::new().unwrap();
let cwd = tmp.path().to_path_buf();
let action = ApplyPatchAction::new_add_for_test(&cwd.join("inside.txt"), "".to_string());
let sandbox_policy = SandboxPolicy::new_read_only_policy();
let file_system_sandbox_policy =
FileSystemSandboxPolicy::from_legacy_sandbox_policy(&sandbox_policy, &cwd);
assert!(!is_write_patch_constrained_to_writable_paths(
&action,
&file_system_sandbox_policy,
&cwd,
));
assert_eq!(
assess_patch_safety(
&action,
AskForApproval::Never,
&sandbox_policy,
&file_system_sandbox_policy,
&cwd,
WindowsSandboxLevel::Disabled,
),
SafetyCheck::Reject {
reason: PATCH_REJECTED_READ_ONLY_REASON.to_string(),
},
);
}

View File

@@ -23,6 +23,14 @@ use pretty_assertions::assert_eq;
use tempfile::TempDir;
use wiremock::matchers::header;
fn normalize_git_remote_url(url: &str) -> String {
let normalized = url.trim().trim_end_matches('/');
normalized
.strip_suffix(".git")
.unwrap_or(normalized)
.to_string()
}
#[tokio::test]
async fn responses_stream_includes_subagent_header_on_review() {
core_test_support::skip_if_no_network!();
@@ -540,13 +548,15 @@ async fn responses_stream_includes_turn_metadata_header_for_git_workspace_e2e()
.and_then(serde_json::Value::as_str),
Some(expected_head.as_str())
);
let actual_origin = workspace
.get("associated_remote_urls")
.and_then(serde_json::Value::as_object)
.and_then(|remotes| remotes.get("origin"))
.and_then(serde_json::Value::as_str)
.expect("origin remote should be present");
assert_eq!(
workspace
.get("associated_remote_urls")
.and_then(serde_json::Value::as_object)
.and_then(|remotes| remotes.get("origin"))
.and_then(serde_json::Value::as_str),
Some(expected_origin.as_str())
normalize_git_remote_url(actual_origin),
normalize_git_remote_url(&expected_origin)
);
assert_eq!(
workspace

View File

@@ -6,7 +6,10 @@ use codex_utils_cli::CliConfigOverrides;
use std::path::PathBuf;
#[derive(Parser, Debug)]
#[command(version)]
#[command(
version,
override_usage = "codex exec [OPTIONS] [PROMPT]\n codex exec [OPTIONS] <COMMAND> [ARGS]"
)]
pub struct Cli {
/// Action to perform. If omitted, runs a new non-interactive session.
#[command(subcommand)]