mirror of
https://github.com/openai/codex.git
synced 2026-04-28 18:32:04 +03:00
feat: use shell policy in shell snapshot (#11759)
Honor `shell_environment_policy.set` even after a shell snapshot
This commit is contained in:
@@ -20,6 +20,7 @@ use core_test_support::wait_for_event;
|
||||
use core_test_support::wait_for_event_match;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use tokio::fs;
|
||||
@@ -36,6 +37,17 @@ struct SnapshotRun {
|
||||
codex_home: PathBuf,
|
||||
}
|
||||
|
||||
const POLICY_PATH_FOR_TEST: &str = "/codex/policy/path";
|
||||
const SNAPSHOT_PATH_FOR_TEST: &str = "/codex/snapshot/path";
|
||||
const SNAPSHOT_MARKER_VAR: &str = "CODEX_SNAPSHOT_POLICY_MARKER";
|
||||
const SNAPSHOT_MARKER_VALUE: &str = "from_snapshot";
|
||||
const POLICY_SUCCESS_OUTPUT: &str = "policy-after-snapshot";
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct SnapshotRunOptions {
|
||||
shell_environment_set: HashMap<String, String>,
|
||||
}
|
||||
|
||||
async fn wait_for_snapshot(codex_home: &Path) -> Result<PathBuf> {
|
||||
let snapshot_dir = codex_home.join("shell_snapshots");
|
||||
let deadline = Instant::now() + Duration::from_secs(5);
|
||||
@@ -54,12 +66,57 @@ async fn wait_for_snapshot(codex_home: &Path) -> Result<PathBuf> {
|
||||
}
|
||||
}
|
||||
|
||||
async fn wait_for_file_contents(path: &Path) -> Result<String> {
|
||||
let deadline = Instant::now() + Duration::from_secs(5);
|
||||
loop {
|
||||
match fs::read_to_string(path).await {
|
||||
Ok(contents) => return Ok(contents),
|
||||
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
|
||||
Err(err) => return Err(err.into()),
|
||||
}
|
||||
|
||||
if Instant::now() >= deadline {
|
||||
anyhow::bail!("timed out waiting for file {}", path.display());
|
||||
}
|
||||
|
||||
sleep(Duration::from_millis(25)).await;
|
||||
}
|
||||
}
|
||||
|
||||
fn policy_set_path_for_test() -> HashMap<String, String> {
|
||||
HashMap::from([("PATH".to_string(), POLICY_PATH_FOR_TEST.to_string())])
|
||||
}
|
||||
|
||||
fn snapshot_override_content_for_policy_test() -> String {
|
||||
format!(
|
||||
"# Snapshot file\nexport PATH='{SNAPSHOT_PATH_FOR_TEST}'\nexport {SNAPSHOT_MARKER_VAR}='{SNAPSHOT_MARKER_VALUE}'\n"
|
||||
)
|
||||
}
|
||||
|
||||
fn command_asserting_policy_after_snapshot() -> String {
|
||||
format!(
|
||||
"if [ \"${{{SNAPSHOT_MARKER_VAR}:-}}\" = \"{SNAPSHOT_MARKER_VALUE}\" ] && [ \"$PATH\" != \"{SNAPSHOT_PATH_FOR_TEST}\" ]; then case \":$PATH:\" in *\":{POLICY_PATH_FOR_TEST}:\"*) printf \"{POLICY_SUCCESS_OUTPUT}\" ;; *) printf \"path=%s marker=%s\" \"$PATH\" \"${{{SNAPSHOT_MARKER_VAR}:-missing}}\" ;; esac; else printf \"path=%s marker=%s\" \"$PATH\" \"${{{SNAPSHOT_MARKER_VAR}:-missing}}\"; fi"
|
||||
)
|
||||
}
|
||||
|
||||
#[allow(clippy::expect_used)]
|
||||
async fn run_snapshot_command(command: &str) -> Result<SnapshotRun> {
|
||||
let builder = test_codex().with_config(|config| {
|
||||
run_snapshot_command_with_options(command, SnapshotRunOptions::default()).await
|
||||
}
|
||||
|
||||
#[allow(clippy::expect_used)]
|
||||
async fn run_snapshot_command_with_options(
|
||||
command: &str,
|
||||
options: SnapshotRunOptions,
|
||||
) -> Result<SnapshotRun> {
|
||||
let SnapshotRunOptions {
|
||||
shell_environment_set,
|
||||
} = options;
|
||||
let builder = test_codex().with_config(move |config| {
|
||||
config.use_experimental_unified_exec_tool = true;
|
||||
config.features.enable(Feature::UnifiedExec);
|
||||
config.features.enable(Feature::ShellSnapshot);
|
||||
config.permissions.shell_environment_policy.r#set = shell_environment_set;
|
||||
});
|
||||
let harness = TestCodexHarness::with_builder(builder).await?;
|
||||
let args = json!({
|
||||
@@ -132,8 +189,20 @@ async fn run_snapshot_command(command: &str) -> Result<SnapshotRun> {
|
||||
|
||||
#[allow(clippy::expect_used)]
|
||||
async fn run_shell_command_snapshot(command: &str) -> Result<SnapshotRun> {
|
||||
let builder = test_codex().with_config(|config| {
|
||||
run_shell_command_snapshot_with_options(command, SnapshotRunOptions::default()).await
|
||||
}
|
||||
|
||||
#[allow(clippy::expect_used)]
|
||||
async fn run_shell_command_snapshot_with_options(
|
||||
command: &str,
|
||||
options: SnapshotRunOptions,
|
||||
) -> Result<SnapshotRun> {
|
||||
let SnapshotRunOptions {
|
||||
shell_environment_set,
|
||||
} = options;
|
||||
let builder = test_codex().with_config(move |config| {
|
||||
config.features.enable(Feature::ShellSnapshot);
|
||||
config.permissions.shell_environment_policy.r#set = shell_environment_set;
|
||||
});
|
||||
let harness = TestCodexHarness::with_builder(builder).await?;
|
||||
let args = json!({
|
||||
@@ -204,6 +273,64 @@ async fn run_shell_command_snapshot(command: &str) -> Result<SnapshotRun> {
|
||||
})
|
||||
}
|
||||
|
||||
#[allow(clippy::expect_used)]
|
||||
async fn run_tool_turn_on_harness(
|
||||
harness: &TestCodexHarness,
|
||||
prompt: &str,
|
||||
call_id: &str,
|
||||
tool_name: &str,
|
||||
args: serde_json::Value,
|
||||
) -> Result<ExecCommandEndEvent> {
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, tool_name, &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_response_created("resp-2"),
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(harness.server(), responses).await;
|
||||
|
||||
let test = harness.test();
|
||||
let codex = test.codex.clone();
|
||||
let session_model = test.session_configured.model.clone();
|
||||
let cwd = test.cwd_path().to_path_buf();
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: prompt.into(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd,
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
collaboration_mode: None,
|
||||
personality: None,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ExecCommandBegin(ev) if ev.call_id == call_id => Some(ev.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
let end = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ExecCommandEnd(ev) if ev.call_id == call_id => Some(ev.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
Ok(end)
|
||||
}
|
||||
|
||||
fn normalize_newlines(text: &str) -> String {
|
||||
text.replace("\r\n", "\n")
|
||||
}
|
||||
@@ -260,6 +387,100 @@ async fn linux_shell_command_uses_shell_snapshot() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(target_os = "windows", ignore)]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_command_snapshot_preserves_shell_environment_policy_set() -> Result<()> {
|
||||
let builder = test_codex().with_config(|config| {
|
||||
config.features.enable(Feature::ShellSnapshot);
|
||||
config.permissions.shell_environment_policy.r#set = policy_set_path_for_test();
|
||||
});
|
||||
let harness = TestCodexHarness::with_builder(builder).await?;
|
||||
let codex_home = harness.test().home.path().to_path_buf();
|
||||
run_tool_turn_on_harness(
|
||||
&harness,
|
||||
"warm up shell snapshot",
|
||||
"shell-snapshot-policy-warmup",
|
||||
"shell_command",
|
||||
json!({
|
||||
"command": "printf warmup",
|
||||
"timeout_ms": 1_000,
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
let snapshot_path = wait_for_snapshot(&codex_home).await?;
|
||||
fs::write(&snapshot_path, snapshot_override_content_for_policy_test()).await?;
|
||||
|
||||
let command = command_asserting_policy_after_snapshot();
|
||||
let end = run_tool_turn_on_harness(
|
||||
&harness,
|
||||
"verify shell policy after snapshot",
|
||||
"shell-snapshot-policy-assert",
|
||||
"shell_command",
|
||||
json!({
|
||||
"command": command,
|
||||
"timeout_ms": 1_000,
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert_eq!(
|
||||
normalize_newlines(&end.stdout).trim(),
|
||||
POLICY_SUCCESS_OUTPUT
|
||||
);
|
||||
assert_eq!(end.exit_code, 0);
|
||||
assert!(snapshot_path.starts_with(codex_home));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(not(target_os = "linux"), ignore)]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn linux_unified_exec_snapshot_preserves_shell_environment_policy_set() -> Result<()> {
|
||||
let builder = test_codex().with_config(|config| {
|
||||
config.use_experimental_unified_exec_tool = true;
|
||||
config.features.enable(Feature::UnifiedExec);
|
||||
config.features.enable(Feature::ShellSnapshot);
|
||||
config.permissions.shell_environment_policy.r#set = policy_set_path_for_test();
|
||||
});
|
||||
let harness = TestCodexHarness::with_builder(builder).await?;
|
||||
let codex_home = harness.test().home.path().to_path_buf();
|
||||
run_tool_turn_on_harness(
|
||||
&harness,
|
||||
"warm up unified exec shell snapshot",
|
||||
"shell-snapshot-policy-warmup-exec",
|
||||
"exec_command",
|
||||
json!({
|
||||
"cmd": "printf warmup",
|
||||
"yield_time_ms": 1_000,
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
let snapshot_path = wait_for_snapshot(&codex_home).await?;
|
||||
fs::write(&snapshot_path, snapshot_override_content_for_policy_test()).await?;
|
||||
|
||||
let command = command_asserting_policy_after_snapshot();
|
||||
let end = run_tool_turn_on_harness(
|
||||
&harness,
|
||||
"verify unified exec policy after snapshot",
|
||||
"shell-snapshot-policy-assert-exec",
|
||||
"exec_command",
|
||||
json!({
|
||||
"cmd": command,
|
||||
"yield_time_ms": 1_000,
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert_eq!(
|
||||
normalize_newlines(&end.stdout).trim(),
|
||||
POLICY_SUCCESS_OUTPUT
|
||||
);
|
||||
assert_eq!(end.exit_code, 0);
|
||||
assert!(snapshot_path.starts_with(codex_home));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(target_os = "windows", ignore)]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_command_snapshot_still_intercepts_apply_patch() -> Result<()> {
|
||||
@@ -316,7 +537,10 @@ async fn shell_command_snapshot_still_intercepts_apply_patch() -> Result<()> {
|
||||
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
assert_eq!(fs::read_to_string(&target).await?, "hello from snapshot\n");
|
||||
assert_eq!(
|
||||
wait_for_file_contents(&target).await?,
|
||||
"hello from snapshot\n"
|
||||
);
|
||||
|
||||
let snapshot_path = wait_for_snapshot(&codex_home).await?;
|
||||
let snapshot_content = fs::read_to_string(&snapshot_path).await?;
|
||||
|
||||
Reference in New Issue
Block a user