core: remove special execve handling for skill scripts (#15812)

This commit is contained in:
Michael Bolin
2026-03-26 07:46:04 -07:00
committed by GitHub
parent 6dcac41d53
commit 01fa4f0212
7 changed files with 35 additions and 1073 deletions

View File

@@ -1870,7 +1870,6 @@ impl Session {
session_telemetry,
models_manager: Arc::clone(&models_manager),
tool_approvals: Mutex::new(ApprovalStore::default()),
execve_session_approvals: RwLock::new(HashMap::new()),
skills_manager,
plugins_manager: Arc::clone(&plugins_manager),
mcp_manager: Arc::clone(&mcp_manager),

View File

@@ -2683,7 +2683,6 @@ pub(crate) async fn make_session_and_context() -> (Session, TurnContext) {
session_telemetry: session_telemetry.clone(),
models_manager: Arc::clone(&models_manager),
tool_approvals: Mutex::new(ApprovalStore::default()),
execve_session_approvals: RwLock::new(HashMap::new()),
skills_manager,
plugins_manager,
mcp_manager,
@@ -3521,7 +3520,6 @@ pub(crate) async fn make_session_and_context_with_dynamic_tools_and_rx(
session_telemetry: session_telemetry.clone(),
models_manager: Arc::clone(&models_manager),
tool_approvals: Mutex::new(ApprovalStore::default()),
execve_session_approvals: RwLock::new(HashMap::new()),
skills_manager,
plugins_manager,
mcp_manager,

View File

@@ -1,4 +1,3 @@
use std::collections::HashMap;
use std::sync::Arc;
use crate::AuthManager;
@@ -16,14 +15,12 @@ use crate::skills_watcher::SkillsWatcher;
use crate::state_db::StateDbHandle;
use crate::tools::code_mode::CodeModeService;
use crate::tools::network_approval::NetworkApprovalService;
use crate::tools::runtimes::ExecveSessionApproval;
use crate::tools::sandboxing::ApprovalStore;
use crate::unified_exec::UnifiedExecProcessManager;
use codex_analytics::AnalyticsEventsClient;
use codex_exec_server::Environment;
use codex_hooks::Hooks;
use codex_otel::SessionTelemetry;
use codex_utils_absolute_path::AbsolutePathBuf;
use std::path::PathBuf;
use tokio::sync::Mutex;
use tokio::sync::RwLock;
@@ -49,8 +46,6 @@ pub(crate) struct SessionServices {
pub(crate) models_manager: Arc<ModelsManager>,
pub(crate) session_telemetry: SessionTelemetry,
pub(crate) tool_approvals: Mutex<ApprovalStore>,
#[cfg_attr(not(unix), allow(dead_code))]
pub(crate) execve_session_approvals: RwLock<HashMap<AbsolutePathBuf, ExecveSessionApproval>>,
pub(crate) skills_manager: Arc<SkillsManager>,
pub(crate) plugins_manager: Arc<PluginsManager>,
pub(crate) mcp_manager: Arc<McpManager>,

View File

@@ -4,7 +4,6 @@ Module: runtimes
Concrete ToolRuntime implementations for specific tools. Each runtime stays
small and focused and reuses the orchestrator for approvals + sandbox + retry.
*/
use crate::SkillMetadata;
use crate::path_utils;
use crate::shell::Shell;
use crate::tools::sandboxing::ToolError;
@@ -17,14 +16,6 @@ pub mod apply_patch;
pub mod shell;
pub mod unified_exec;
#[derive(Debug, Clone)]
pub(crate) struct ExecveSessionApproval {
/// If this execve session approval is associated with a skill script, this
/// field contains metadata about the skill.
#[cfg_attr(not(unix), allow(dead_code))]
pub skill: Option<SkillMetadata>,
}
/// Shared helper to construct sandbox transform inputs from a tokenized command line.
/// Validates that at least a program is present.
pub(crate) fn build_sandbox_command(

View File

@@ -1,5 +1,4 @@
use super::ShellRequest;
use crate::SkillMetadata;
use crate::error::CodexErr;
use crate::error::SandboxErr;
use crate::exec::ExecCapturePolicy;
@@ -13,8 +12,6 @@ use crate::sandboxing::ExecOptions;
use crate::sandboxing::ExecRequest;
use crate::sandboxing::SandboxPermissions;
use crate::shell::ShellType;
use crate::skills_load_input_from_config;
use crate::tools::runtimes::ExecveSessionApproval;
use crate::tools::runtimes::build_sandbox_command;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::ToolCtx;
@@ -31,7 +28,6 @@ use codex_protocol::models::PermissionProfile;
use codex_protocol::permissions::FileSystemSandboxPolicy;
use codex_protocol::permissions::NetworkSandboxPolicy;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::ExecApprovalRequestSkillMetadata;
use codex_protocol::protocol::NetworkPolicyRuleAction;
use codex_protocol::protocol::ReviewDecision;
use codex_protocol::protocol::SandboxPolicy;
@@ -74,9 +70,6 @@ const REJECT_SANDBOX_APPROVAL_REASON: &str =
"approval required by policy, but AskForApproval::Granular.sandbox_approval is false";
const REJECT_RULES_APPROVAL_REASON: &str =
"approval required by policy rule, but AskForApproval::Granular.rules is false";
const REJECT_SKILL_APPROVAL_REASON: &str =
"approval required by skill, but AskForApproval::Granular.skill_approval is false";
fn approval_sandbox_permissions(
sandbox_permissions: SandboxPermissions,
additional_permissions_preapproved: bool,
@@ -327,9 +320,6 @@ struct CoreShellActionProvider {
#[allow(clippy::large_enum_variant)]
enum DecisionSource {
SkillScript {
skill: SkillMetadata,
},
PrefixRule,
/// Often, this is `is_safe_command()`.
UnmatchedCommandFallback,
@@ -341,11 +331,6 @@ fn execve_prompt_is_rejected_by_policy(
) -> Option<&'static str> {
match (approval_policy, decision_source) {
(AskForApproval::Never, _) => Some(PROMPT_CONFLICT_REASON),
(AskForApproval::Granular(granular_config), DecisionSource::SkillScript { .. })
if !granular_config.allows_skill_approval() =>
{
Some(REJECT_SKILL_APPROVAL_REASON)
}
(AskForApproval::Granular(granular_config), DecisionSource::PrefixRule)
if !granular_config.allows_rules_approval() =>
{
@@ -397,17 +382,6 @@ impl CoreShellActionProvider {
}
}
fn skill_escalation_execution(skill: &SkillMetadata) -> EscalationExecution {
let permission_profile = skill.permission_profile.clone().unwrap_or_default();
if permission_profile.is_empty() {
EscalationExecution::TurnDefault
} else {
EscalationExecution::Permissions(EscalationPermissions::PermissionProfile(
permission_profile,
))
}
}
async fn prompt(
&self,
program: &AbsolutePathBuf,
@@ -415,7 +389,6 @@ impl CoreShellActionProvider {
workdir: &AbsolutePathBuf,
stopwatch: &Stopwatch,
additional_permissions: Option<PermissionProfile>,
decision_source: &DecisionSource,
) -> anyhow::Result<ReviewDecision> {
let command = join_program_and_argv(program, argv);
let workdir = workdir.to_path_buf();
@@ -442,28 +415,6 @@ impl CoreShellActionProvider {
)
.await;
}
let available_decisions = vec![
Some(ReviewDecision::Approved),
// Currently, ApprovedForSession is only honored for skills,
// so only offer it for skill script approvals.
if matches!(decision_source, DecisionSource::SkillScript { .. }) {
Some(ReviewDecision::ApprovedForSession)
} else {
None
},
Some(ReviewDecision::Abort),
]
.into_iter()
.flatten()
.collect();
let skill_metadata = match decision_source {
DecisionSource::SkillScript { skill } => {
Some(ExecApprovalRequestSkillMetadata {
path_to_skills_md: skill.path_to_skills_md.clone(),
})
}
DecisionSource::PrefixRule | DecisionSource::UnmatchedCommandFallback => None,
};
session
.request_command_approval(
&turn,
@@ -475,48 +426,14 @@ impl CoreShellActionProvider {
/*network_approval_context*/ None,
/*proposed_execpolicy_amendment*/ None,
additional_permissions,
skill_metadata,
Some(available_decisions),
/*skill_metadata*/ None,
Some(vec![ReviewDecision::Approved, ReviewDecision::Abort]),
)
.await
})
.await)
}
/// Because we should be intercepting execve(2) calls, `program` should be
/// an absolute path. The idea is that we check to see whether it matches
/// any skills.
async fn find_skill(&self, program: &AbsolutePathBuf) -> Option<SkillMetadata> {
let force_reload = false;
let turn_config = self.turn.config.as_ref();
let plugin_outcome = self
.session
.services
.plugins_manager
.plugins_for_config(turn_config);
let effective_skill_roots = plugin_outcome.effective_skill_roots();
let skills_input = skills_load_input_from_config(turn_config, effective_skill_roots);
let skills_outcome = self
.session
.services
.skills_manager
.skills_for_cwd(&skills_input, force_reload)
.await;
let program_path = program.as_path();
for skill in skills_outcome.skills {
// We intentionally ignore "enabled" status here for now.
let Some(skill_root) = skill.path_to_skills_md.parent() else {
continue;
};
if program_path.starts_with(skill_root.join("scripts")) {
return Some(skill);
}
}
None
}
#[allow(clippy::too_many_arguments)]
async fn process_decision(
&self,
@@ -540,17 +457,11 @@ impl CoreShellActionProvider {
EscalationDecision::deny(Some("Execution forbidden by policy".to_string()))
} else {
match self
.prompt(
program,
argv,
workdir,
&self.stopwatch,
prompt_permissions,
&decision_source,
)
.prompt(program, argv, workdir, &self.stopwatch, prompt_permissions)
.await?
{
ReviewDecision::Approved
| ReviewDecision::ApprovedForSession
| ReviewDecision::ApprovedExecpolicyAmendment { .. } => {
if needs_escalation {
EscalationDecision::escalate(escalation_execution.clone())
@@ -558,33 +469,6 @@ impl CoreShellActionProvider {
EscalationDecision::run()
}
}
ReviewDecision::ApprovedForSession => {
// Currently, we only add session approvals for
// skill scripts because we are storing only the
// `program` whereas prefix rules may be restricted by a longer prefix.
if let DecisionSource::SkillScript { skill } = decision_source {
tracing::debug!(
"Adding session approval for {program:?} due to user approval of skill script {skill:?}"
);
self.session
.services
.execve_session_approvals
.write()
.await
.insert(
program.clone(),
ExecveSessionApproval {
skill: Some(skill.clone()),
},
);
}
if needs_escalation {
EscalationDecision::escalate(escalation_execution.clone())
} else {
EscalationDecision::run()
}
}
ReviewDecision::NetworkPolicyAmendment {
network_policy_amendment,
} => match network_policy_amendment.action {
@@ -641,69 +525,6 @@ impl EscalationPolicy for CoreShellActionProvider {
"Determining escalation action for command {program:?} with args {argv:?} in {workdir:?}"
);
// Check to see whether `program` has an existing entry in
// `execve_session_approvals`. If so, we can skip policy checks and user
// prompts and go straight to allowing execution.
let approval = {
self.session
.services
.execve_session_approvals
.read()
.await
.get(program)
.cloned()
};
if let Some(approval) = approval {
tracing::debug!(
"Found session approval for {program:?}, allowing execution without further checks"
);
let execution = approval
.skill
.as_ref()
.map(Self::skill_escalation_execution)
.unwrap_or(EscalationExecution::TurnDefault);
return Ok(EscalationDecision::escalate(execution));
}
// In the usual case, the execve wrapper reports the command being
// executed in `program`, so a direct skill lookup is sufficient.
if let Some(skill) = self.find_skill(program).await {
// For now, scripts that look like they belong to skills bypass
// general exec policy evaluation. Permissionless skills inherit the
// turn sandbox directly; skills with declared permissions still
// prompt here before applying their permission profile.
let prompt_permissions = skill.permission_profile.clone();
if prompt_permissions
.as_ref()
.is_none_or(PermissionProfile::is_empty)
{
tracing::debug!(
"Matched {program:?} to permissionless skill {skill:?}, inheriting turn sandbox"
);
return Ok(EscalationDecision::escalate(
EscalationExecution::TurnDefault,
));
}
tracing::debug!("Matched {program:?} to skill {skill:?}, prompting for approval");
let needs_escalation = true;
let decision_source = DecisionSource::SkillScript {
skill: skill.clone(),
};
return self
.process_decision(
Decision::Prompt,
needs_escalation,
program,
argv,
workdir,
prompt_permissions,
Self::skill_escalation_execution(&skill),
decision_source,
)
.await;
}
let evaluation = {
let policy = self.policy.read().await;
evaluate_intercepted_exec_policy(
@@ -746,7 +567,6 @@ impl EscalationPolicy for CoreShellActionProvider {
.macos_seatbelt_profile_extensions
.as_ref(),
),
DecisionSource::SkillScript { .. } => unreachable!("handled above"),
};
self.process_decision(
evaluation.decision,

View File

@@ -8,7 +8,6 @@ use super::evaluate_intercepted_exec_policy;
use super::extract_shell_script;
use super::join_program_and_argv;
use super::map_exec_result;
use crate::SkillMetadata;
#[cfg(target_os = "macos")]
use crate::config::Constrained;
#[cfg(target_os = "macos")]
@@ -36,7 +35,6 @@ use codex_protocol::permissions::FileSystemSandboxEntry;
use codex_protocol::permissions::FileSystemSandboxPolicy;
use codex_protocol::permissions::FileSystemSpecialPath;
use codex_protocol::permissions::NetworkSandboxPolicy;
use codex_protocol::protocol::SkillScope;
use codex_sandboxing::SandboxType;
#[cfg(target_os = "macos")]
use codex_sandboxing::seatbelt::MACOS_PATH_TO_SEATBELT_EXECUTABLE;
@@ -83,55 +81,6 @@ fn unrestricted_file_system_sandbox_policy() -> FileSystemSandboxPolicy {
FileSystemSandboxPolicy::unrestricted()
}
fn test_skill_metadata(permission_profile: Option<PermissionProfile>) -> SkillMetadata {
SkillMetadata {
name: "skill".to_string(),
description: "description".to_string(),
short_description: None,
interface: None,
dependencies: None,
policy: None,
permission_profile,
managed_network_override: None,
path_to_skills_md: PathBuf::from("/tmp/skill/SKILL.md"),
scope: SkillScope::User,
}
}
#[test]
fn execve_prompt_rejection_uses_skill_approval_for_skill_scripts() {
let decision_source = super::DecisionSource::SkillScript {
skill: test_skill_metadata(None),
};
assert_eq!(
super::execve_prompt_is_rejected_by_policy(
AskForApproval::Granular(GranularApprovalConfig {
sandbox_approval: true,
rules: true,
skill_approval: true,
request_permissions: true,
mcp_elicitations: true,
}),
&decision_source,
),
None,
);
assert_eq!(
super::execve_prompt_is_rejected_by_policy(
AskForApproval::Granular(GranularApprovalConfig {
sandbox_approval: true,
rules: true,
skill_approval: false,
request_permissions: true,
mcp_elicitations: true,
}),
&decision_source,
),
Some("approval required by skill, but AskForApproval::Granular.skill_approval is false"),
);
}
#[test]
fn execve_prompt_rejection_keeps_prefix_rules_on_rules_flag() {
assert_eq!(
@@ -392,42 +341,6 @@ fn shell_request_escalation_execution_is_explicit() {
);
}
#[test]
fn skill_escalation_execution_uses_additional_permissions() {
let requested_permissions = PermissionProfile {
file_system: Some(FileSystemPermissions {
read: None,
write: Some(vec![
AbsolutePathBuf::from_absolute_path("/tmp/output").unwrap(),
]),
}),
..Default::default()
};
assert_eq!(
CoreShellActionProvider::skill_escalation_execution(&test_skill_metadata(Some(
requested_permissions.clone(),
))),
EscalationExecution::Permissions(EscalationPermissions::PermissionProfile(
requested_permissions,
)),
);
}
#[test]
fn skill_escalation_execution_ignores_empty_permissions() {
assert_eq!(
CoreShellActionProvider::skill_escalation_execution(&test_skill_metadata(Some(
PermissionProfile::default(),
))),
EscalationExecution::TurnDefault,
);
assert_eq!(
CoreShellActionProvider::skill_escalation_execution(&test_skill_metadata(None)),
EscalationExecution::TurnDefault,
);
}
#[test]
fn evaluate_intercepted_exec_policy_uses_wrapper_command_when_shell_wrapper_parsing_disabled() {
let policy_src = r#"prefix_rule(pattern = ["npm", "publish"], decision = "prompt")"#;

View File

@@ -2,18 +2,13 @@
#![cfg(unix)]
use anyhow::Result;
use codex_protocol::models::FileSystemPermissions;
use codex_protocol::models::PermissionProfile;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::ExecApprovalRequestEvent;
use codex_protocol::protocol::ExecApprovalRequestSkillMetadata;
use codex_protocol::protocol::GranularApprovalConfig;
use codex_protocol::protocol::Op;
use codex_protocol::protocol::ReviewDecision;
use codex_protocol::protocol::SandboxPolicy;
use codex_protocol::user_input::UserInput;
use codex_utils_absolute_path::AbsolutePathBuf;
use core_test_support::responses::mount_function_call_agent_response;
use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
@@ -23,19 +18,10 @@ use core_test_support::wait_for_event_match;
use core_test_support::zsh_fork::build_zsh_fork_test;
use core_test_support::zsh_fork::restrictive_workspace_write_policy;
use core_test_support::zsh_fork::zsh_fork_runtime;
use pretty_assertions::assert_eq;
use serde_json::json;
use std::fs;
use std::path::Path;
use std::path::PathBuf;
fn absolute_path(path: &Path) -> AbsolutePathBuf {
match AbsolutePathBuf::try_from(path) {
Ok(path) => path,
Err(err) => panic!("absolute path: {err}"),
}
}
fn write_skill_metadata(home: &Path, name: &str, contents: &str) -> Result<()> {
let metadata_dir = home.join("skills").join(name).join("agents");
fs::create_dir_all(&metadata_dir)?;
@@ -44,7 +30,7 @@ fn write_skill_metadata(home: &Path, name: &str, contents: &str) -> Result<()> {
}
fn shell_command_arguments(command: &str) -> Result<String> {
Ok(serde_json::to_string(&json!({
Ok(serde_json::to_string(&serde_json::json!({
"command": command,
"timeout_ms": 500,
}))?)
@@ -78,18 +64,6 @@ async fn submit_turn_with_policies(
Ok(())
}
fn write_skill_with_shell_script(home: &Path, name: &str, script_name: &str) -> Result<PathBuf> {
write_skill_with_shell_script_contents(
home,
name,
script_name,
r#"#!/bin/sh
echo 'zsh-fork-stdout'
echo 'zsh-fork-stderr' >&2
"#,
)
}
#[cfg(unix)]
fn write_skill_with_shell_script_contents(
home: &Path,
@@ -121,15 +95,13 @@ description: {name} skill
Ok(script_path)
}
fn skill_script_command(test: &TestCodex, script_name: &str) -> Result<(String, String)> {
fn skill_script_command(test: &TestCodex, script_name: &str) -> Result<String> {
let script_path = fs::canonicalize(
test.codex_home_path()
.join("skills/mbolin-test-skill/scripts")
.join(script_name),
)?;
let script_path_str = script_path.to_string_lossy().into_owned();
let command = shlex::try_join([script_path_str.as_str()])?;
Ok((script_path_str, command))
Ok(shlex::try_join([script_path.to_string_lossy().as_ref()])?)
}
async fn wait_for_exec_approval_request(test: &TestCodex) -> Option<ExecApprovalRequestEvent> {
@@ -154,325 +126,12 @@ fn output_shows_sandbox_denial(output: &str) -> bool {
|| output.contains("Read-only file system")
}
/// Focus on the approval payload: the skill should prompt before execution and
/// only advertise the permissions declared in its metadata.
#[cfg(unix)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_zsh_fork_prompts_for_skill_script_execution() -> Result<()> {
async fn shell_zsh_fork_skill_scripts_ignore_declared_permissions() -> Result<()> {
skip_if_no_network!(Ok(()));
let Some(runtime) = zsh_fork_runtime("zsh-fork skill prompt test")? else {
return Ok(());
};
let server = start_mock_server().await;
let tool_call_id = "zsh-fork-skill-call";
let test = build_zsh_fork_test(
&server,
runtime,
AskForApproval::OnRequest,
SandboxPolicy::new_workspace_write_policy(),
|home| {
write_skill_with_shell_script(home, "mbolin-test-skill", "hello-mbolin.sh").unwrap();
write_skill_metadata(
home,
"mbolin-test-skill",
r#"
permissions:
file_system:
read:
- "./data"
write:
- "./output"
"#,
)
.unwrap();
},
)
.await?;
let (script_path_str, command) = skill_script_command(&test, "hello-mbolin.sh")?;
let arguments = shell_command_arguments(&command)?;
let mocks =
mount_function_call_agent_response(&server, tool_call_id, &arguments, "shell_command")
.await;
submit_turn_with_policies(
&test,
"use $mbolin-test-skill",
AskForApproval::OnRequest,
SandboxPolicy::new_workspace_write_policy(),
)
.await?;
let maybe_approval = wait_for_exec_approval_request(&test).await;
let approval = match maybe_approval {
Some(approval) => approval,
None => {
let call_output = mocks
.completion
.single_request()
.function_call_output(tool_call_id);
panic!(
"expected exec approval request before completion; function_call_output={call_output:?}"
);
}
};
assert_eq!(approval.call_id, tool_call_id);
assert_eq!(approval.command, vec![script_path_str.clone()]);
assert_eq!(
approval.available_decisions,
Some(vec![
ReviewDecision::Approved,
ReviewDecision::ApprovedForSession,
ReviewDecision::Abort,
])
);
assert_eq!(
approval.additional_permissions,
Some(PermissionProfile {
file_system: Some(FileSystemPermissions {
read: Some(vec![absolute_path(
&test.codex_home_path().join("skills/mbolin-test-skill/data"),
)]),
write: Some(vec![absolute_path(
&test
.codex_home_path()
.join("skills/mbolin-test-skill/output"),
)]),
}),
..Default::default()
})
);
assert_eq!(
approval.skill_metadata,
Some(ExecApprovalRequestSkillMetadata {
path_to_skills_md: test
.codex_home_path()
.join("skills/mbolin-test-skill/agents/openai.yaml"),
})
);
test.codex
.submit(Op::ExecApproval {
id: approval.effective_approval_id(),
turn_id: None,
decision: ReviewDecision::Denied,
})
.await?;
wait_for_turn_complete(&test).await;
let call_output = mocks
.completion
.single_request()
.function_call_output(tool_call_id);
let output = call_output["output"].as_str().unwrap_or_default();
assert!(
output.contains("Execution denied: User denied execution"),
"expected rejection marker in function_call_output: {output:?}"
);
Ok(())
}
#[cfg(unix)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_zsh_fork_skill_script_reject_policy_with_sandbox_approval_false_still_prompts()
-> Result<()> {
skip_if_no_network!(Ok(()));
let Some(runtime) = zsh_fork_runtime("zsh-fork reject false skill prompt test")? else {
return Ok(());
};
let approval_policy = AskForApproval::Granular(GranularApprovalConfig {
sandbox_approval: true,
rules: true,
skill_approval: true,
request_permissions: true,
mcp_elicitations: true,
});
let server = start_mock_server().await;
let tool_call_id = "zsh-fork-skill-reject-false";
let test = build_zsh_fork_test(
&server,
runtime,
approval_policy,
SandboxPolicy::new_workspace_write_policy(),
|home| {
write_skill_with_shell_script(home, "mbolin-test-skill", "hello-mbolin.sh").unwrap();
write_skill_metadata(
home,
"mbolin-test-skill",
r#"
permissions:
file_system:
write:
- "./output"
"#,
)
.unwrap();
},
)
.await?;
let (script_path_str, command) = skill_script_command(&test, "hello-mbolin.sh")?;
let arguments = shell_command_arguments(&command)?;
let mocks =
mount_function_call_agent_response(&server, tool_call_id, &arguments, "shell_command")
.await;
submit_turn_with_policies(
&test,
"use $mbolin-test-skill",
approval_policy,
SandboxPolicy::new_workspace_write_policy(),
)
.await?;
let maybe_approval = wait_for_exec_approval_request(&test).await;
let approval = match maybe_approval {
Some(approval) => approval,
None => {
let call_output = mocks
.completion
.single_request()
.function_call_output(tool_call_id);
panic!(
"expected exec approval request before completion; function_call_output={call_output:?}"
);
}
};
assert_eq!(approval.call_id, tool_call_id);
assert_eq!(approval.command, vec![script_path_str]);
test.codex
.submit(Op::ExecApproval {
id: approval.effective_approval_id(),
turn_id: None,
decision: ReviewDecision::Denied,
})
.await?;
wait_for_turn_complete(&test).await;
let call_output = mocks
.completion
.single_request()
.function_call_output(tool_call_id);
let output = call_output["output"].as_str().unwrap_or_default();
assert!(
output.contains("Execution denied: User denied execution"),
"expected rejection marker in function_call_output: {output:?}"
);
Ok(())
}
#[cfg(unix)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_zsh_fork_skill_script_reject_policy_with_sandbox_approval_true_still_prompts()
-> Result<()> {
skip_if_no_network!(Ok(()));
let Some(runtime) =
zsh_fork_runtime("zsh-fork reject sandbox approval true skill prompt test")?
else {
return Ok(());
};
let approval_policy = AskForApproval::Granular(GranularApprovalConfig {
sandbox_approval: false,
rules: true,
skill_approval: true,
request_permissions: true,
mcp_elicitations: true,
});
let server = start_mock_server().await;
let tool_call_id = "zsh-fork-skill-reject-true";
let test = build_zsh_fork_test(
&server,
runtime,
approval_policy,
SandboxPolicy::new_workspace_write_policy(),
|home| {
write_skill_with_shell_script(home, "mbolin-test-skill", "hello-mbolin.sh").unwrap();
write_skill_metadata(
home,
"mbolin-test-skill",
r#"
permissions:
file_system:
write:
- "./output"
"#,
)
.unwrap();
},
)
.await?;
let (_, command) = skill_script_command(&test, "hello-mbolin.sh")?;
let arguments = shell_command_arguments(&command)?;
let mocks =
mount_function_call_agent_response(&server, tool_call_id, &arguments, "shell_command")
.await;
submit_turn_with_policies(
&test,
"use $mbolin-test-skill",
approval_policy,
SandboxPolicy::new_workspace_write_policy(),
)
.await?;
let maybe_approval = wait_for_exec_approval_request(&test).await;
let approval = match maybe_approval {
Some(approval) => approval,
None => {
let call_output = mocks
.completion
.single_request()
.function_call_output(tool_call_id);
panic!(
"expected exec approval request before completion; function_call_output={call_output:?}"
);
}
};
assert_eq!(approval.call_id, tool_call_id);
test.codex
.submit(Op::ExecApproval {
id: approval.effective_approval_id(),
turn_id: None,
decision: ReviewDecision::Denied,
})
.await?;
wait_for_turn_complete(&test).await;
let call_output = mocks
.completion
.single_request()
.function_call_output(tool_call_id);
let output = call_output["output"].as_str().unwrap_or_default();
assert!(
output.contains("Execution denied: User denied execution"),
"expected rejection marker in function_call_output: {output:?}"
);
Ok(())
}
#[cfg(unix)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_zsh_fork_skill_script_reject_policy_with_skill_approval_true_skips_prompt()
-> Result<()> {
skip_if_no_network!(Ok(()));
let Some(runtime) = zsh_fork_runtime("zsh-fork reject skill approval true skill prompt test")?
else {
let Some(runtime) = zsh_fork_runtime("zsh-fork skill script ignores permissions test")? else {
return Ok(());
};
@@ -483,364 +142,30 @@ async fn shell_zsh_fork_skill_script_reject_policy_with_skill_approval_true_skip
request_permissions: true,
mcp_elicitations: true,
});
let server = start_mock_server().await;
let tool_call_id = "zsh-fork-skill-reject-skill-approval-true";
let test = build_zsh_fork_test(
&server,
runtime,
approval_policy,
SandboxPolicy::new_workspace_write_policy(),
|home| {
write_skill_with_shell_script(home, "mbolin-test-skill", "hello-mbolin.sh").unwrap();
write_skill_metadata(
home,
"mbolin-test-skill",
r#"
permissions:
file_system:
write:
- "./output"
"#,
)
.unwrap();
},
)
.await?;
let (_, command) = skill_script_command(&test, "hello-mbolin.sh")?;
let arguments = shell_command_arguments(&command)?;
let mocks =
mount_function_call_agent_response(&server, tool_call_id, &arguments, "shell_command")
.await;
submit_turn_with_policies(
&test,
"use $mbolin-test-skill",
approval_policy,
SandboxPolicy::new_workspace_write_policy(),
)
.await?;
let approval = wait_for_exec_approval_request(&test).await;
assert!(
approval.is_none(),
"expected reject skill approval policy to skip exec approval"
);
wait_for_turn_complete(&test).await;
let call_output = mocks
.completion
.single_request()
.function_call_output(tool_call_id);
let output = call_output["output"].as_str().unwrap_or_default();
assert!(
output.contains("Execution denied: Execution forbidden by policy"),
"expected policy rejection marker in function_call_output: {output:?}"
);
Ok(())
}
/// Permissionless skills should inherit the turn sandbox without prompting.
#[cfg(unix)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_zsh_fork_skill_without_permissions_inherits_turn_sandbox() -> Result<()> {
skip_if_no_network!(Ok(()));
let Some(runtime) = zsh_fork_runtime("zsh-fork inherited skill sandbox test")? else {
return Ok(());
};
let outside_dir = tempfile::tempdir_in(std::env::current_dir()?)?;
let outside_path = outside_dir
.path()
.join("zsh-fork-skill-inherited-sandbox.txt");
let outside_path_quoted = shlex::try_join([outside_path.to_string_lossy().as_ref()])?;
let script_contents = format!(
"#!/bin/sh\nprintf '%s' forbidden > {outside_path_quoted}\ncat {outside_path_quoted}\n"
);
let outside_path_for_hook = outside_path.clone();
let script_contents_for_hook = script_contents.clone();
let workspace_write_policy = restrictive_workspace_write_policy();
let server = start_mock_server().await;
let test = build_zsh_fork_test(
&server,
runtime,
AskForApproval::OnRequest,
workspace_write_policy.clone(),
move |home| {
let _ = fs::remove_file(&outside_path_for_hook);
write_skill_with_shell_script_contents(
home,
"mbolin-test-skill",
"sandboxed.sh",
&script_contents_for_hook,
)
.unwrap();
},
)
.await?;
let (_, command) = skill_script_command(&test, "sandboxed.sh")?;
let first_call_id = "zsh-fork-skill-permissions-1";
let first_arguments = shell_command_arguments(&command)?;
let first_mocks = mount_function_call_agent_response(
&server,
first_call_id,
&first_arguments,
"shell_command",
)
.await;
submit_turn_with_policies(
&test,
"use $mbolin-test-skill",
AskForApproval::OnRequest,
workspace_write_policy.clone(),
)
.await?;
let first_approval = wait_for_exec_approval_request(&test).await;
assert!(
first_approval.is_none(),
"expected permissionless skill script to skip exec approval"
);
wait_for_turn_complete(&test).await;
let first_output = first_mocks
.completion
.single_request()
.function_call_output(first_call_id)["output"]
.as_str()
.unwrap_or_default()
.to_string();
assert!(
output_shows_sandbox_denial(&first_output) || !first_output.contains("forbidden"),
"expected inherited turn sandbox denial on first run, got output: {first_output:?}"
);
assert!(
!outside_path.exists(),
"first run should not write outside the turn sandbox"
);
let second_call_id = "zsh-fork-skill-permissions-2";
let second_arguments = shell_command_arguments(&command)?;
let second_mocks = mount_function_call_agent_response(
&server,
second_call_id,
&second_arguments,
"shell_command",
)
.await;
submit_turn_with_policies(
&test,
"use $mbolin-test-skill",
AskForApproval::OnRequest,
workspace_write_policy,
)
.await?;
let cached_approval = wait_for_exec_approval_request(&test).await;
assert!(
cached_approval.is_none(),
"expected permissionless skill rerun to continue skipping exec approval"
);
let second_output = second_mocks
.completion
.single_request()
.function_call_output(second_call_id)["output"]
.as_str()
.unwrap_or_default()
.to_string();
assert!(
output_shows_sandbox_denial(&second_output) || !second_output.contains("forbidden"),
"expected cached skill approval to retain inherited turn sandboxing, got output: {second_output:?}"
);
assert!(
!outside_path.exists(),
"cached session approval should not widen a permissionless skill to full access"
);
Ok(())
}
/// Empty skill permissions should behave like no skill override and inherit the
/// turn sandbox without prompting.
#[cfg(unix)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_zsh_fork_skill_with_empty_permissions_inherits_turn_sandbox() -> Result<()> {
skip_if_no_network!(Ok(()));
let Some(runtime) = zsh_fork_runtime("zsh-fork empty skill permissions test")? else {
return Ok(());
};
let outside_dir = tempfile::tempdir_in(std::env::current_dir()?)?;
let outside_path = outside_dir
.path()
.join("zsh-fork-skill-empty-permissions.txt");
let outside_path_quoted = shlex::try_join([outside_path.to_string_lossy().as_ref()])?;
let script_contents = format!(
"#!/bin/sh\nprintf '%s' allowed > {outside_path_quoted}\ncat {outside_path_quoted}\n"
);
let outside_path_for_hook = outside_path.clone();
let script_contents_for_hook = script_contents.clone();
let server = start_mock_server().await;
let test = build_zsh_fork_test(
&server,
runtime,
AskForApproval::OnRequest,
SandboxPolicy::DangerFullAccess,
move |home| {
let _ = fs::remove_file(&outside_path_for_hook);
write_skill_with_shell_script_contents(
home,
"mbolin-test-skill",
"sandboxed.sh",
&script_contents_for_hook,
)
.unwrap();
write_skill_metadata(home, "mbolin-test-skill", "permissions: {}\n").unwrap();
},
)
.await?;
let (_, command) = skill_script_command(&test, "sandboxed.sh")?;
let first_call_id = "zsh-fork-skill-empty-permissions-1";
let first_arguments = shell_command_arguments(&command)?;
let first_mocks = mount_function_call_agent_response(
&server,
first_call_id,
&first_arguments,
"shell_command",
)
.await;
submit_turn_with_policies(
&test,
"use $mbolin-test-skill",
AskForApproval::OnRequest,
SandboxPolicy::DangerFullAccess,
)
.await?;
let first_approval = wait_for_exec_approval_request(&test).await;
assert!(
first_approval.is_none(),
"expected empty skill permissions to skip exec approval"
);
wait_for_turn_complete(&test).await;
let first_output = first_mocks
.completion
.single_request()
.function_call_output(first_call_id)["output"]
.as_str()
.unwrap_or_default()
.to_string();
assert!(
first_output.contains("allowed"),
"expected empty skill permissions to inherit full-access turn sandbox, got output: {first_output:?}"
);
assert_eq!(fs::read_to_string(&outside_path)?, "allowed");
let second_call_id = "zsh-fork-skill-empty-permissions-2";
let second_arguments = shell_command_arguments(&command)?;
let second_mocks = mount_function_call_agent_response(
&server,
second_call_id,
&second_arguments,
"shell_command",
)
.await;
let _ = fs::remove_file(&outside_path);
submit_turn_with_policies(
&test,
"use $mbolin-test-skill",
AskForApproval::OnRequest,
SandboxPolicy::DangerFullAccess,
)
.await?;
let cached_approval = wait_for_exec_approval_request(&test).await;
assert!(
cached_approval.is_none(),
"expected empty-permissions skill rerun to continue skipping exec approval"
);
let second_output = second_mocks
.completion
.single_request()
.function_call_output(second_call_id)["output"]
.as_str()
.unwrap_or_default()
.to_string();
assert!(
second_output.contains("allowed"),
"expected cached empty-permissions skill approval to inherit the turn sandbox, got output: {second_output:?}"
);
assert_eq!(fs::read_to_string(&outside_path)?, "allowed");
Ok(())
}
/// The validation to focus on is: writes to the skill-approved folder succeed,
/// and writes to an unrelated folder fail, both before and after cached approval.
#[cfg(unix)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_zsh_fork_skill_session_approval_enforces_skill_permissions() -> Result<()> {
skip_if_no_network!(Ok(()));
let Some(runtime) = zsh_fork_runtime("zsh-fork explicit skill sandbox test")? else {
return Ok(());
};
let outside_dir = tempfile::tempdir_in(std::env::current_dir()?)?;
let allowed_dir = outside_dir.path().join("allowed-output");
let blocked_dir = outside_dir.path().join("blocked-output");
fs::create_dir_all(&allowed_dir)?;
fs::create_dir_all(&blocked_dir)?;
let allowed_path = allowed_dir.join("allowed.txt");
let blocked_path = blocked_dir.join("blocked.txt");
let allowed_path_quoted = shlex::try_join([allowed_path.to_string_lossy().as_ref()])?;
let blocked_path_quoted = shlex::try_join([blocked_path.to_string_lossy().as_ref()])?;
let script_contents = format!(
"#!/bin/sh\nprintf '%s' allowed > {allowed_path_quoted}\ncat {allowed_path_quoted}\nprintf '%s' forbidden > {blocked_path_quoted}\nif [ -f {blocked_path_quoted} ]; then echo blocked-created; fi\n"
"#!/bin/sh\nprintf '%s' allowed > {allowed_path_quoted}\nif [ -f {allowed_path_quoted} ]; then cat {allowed_path_quoted}; fi\n"
);
let allowed_dir_for_hook = allowed_dir.clone();
let allowed_path_for_hook = allowed_path.clone();
let blocked_path_for_hook = blocked_path.clone();
let script_contents_for_hook = script_contents.clone();
let permissions_yaml = format!(
"permissions:\n file_system:\n write:\n - \"{}\"\n",
allowed_dir.display()
);
let workspace_write_policy = restrictive_workspace_write_policy();
let server = start_mock_server().await;
let allowed_path_for_hook = allowed_path.clone();
let script_contents_for_hook = script_contents.clone();
let test = build_zsh_fork_test(
&server,
runtime,
AskForApproval::OnRequest,
approval_policy,
workspace_write_policy.clone(),
move |home| {
let _ = fs::remove_file(&allowed_path_for_hook);
let _ = fs::remove_file(&blocked_path_for_hook);
fs::create_dir_all(&allowed_dir_for_hook).unwrap();
fs::create_dir_all(blocked_path_for_hook.parent().unwrap()).unwrap();
write_skill_with_shell_script_contents(
home,
"mbolin-test-skill",
@@ -853,128 +178,49 @@ async fn shell_zsh_fork_skill_session_approval_enforces_skill_permissions() -> R
)
.await?;
let (script_path_str, command) = skill_script_command(&test, "sandboxed.sh")?;
let first_call_id = "zsh-fork-skill-permissions-1";
let first_arguments = shell_command_arguments(&command)?;
let first_mocks = mount_function_call_agent_response(
&server,
first_call_id,
&first_arguments,
"shell_command",
)
.await;
let command = skill_script_command(&test, "sandboxed.sh")?;
let call_id = "zsh-fork-skill-script-ignores-permissions";
let arguments = shell_command_arguments(&command)?;
let mocks =
mount_function_call_agent_response(&server, call_id, &arguments, "shell_command").await;
submit_turn_with_policies(
&test,
"use $mbolin-test-skill",
AskForApproval::OnRequest,
workspace_write_policy.clone(),
)
.await?;
let maybe_approval = wait_for_exec_approval_request(&test).await;
let approval = match maybe_approval {
Some(approval) => approval,
None => panic!("expected exec approval request before completion"),
};
assert_eq!(approval.call_id, first_call_id);
assert_eq!(approval.command, vec![script_path_str.clone()]);
assert_eq!(
approval.additional_permissions,
Some(PermissionProfile {
file_system: Some(FileSystemPermissions {
read: None,
write: Some(vec![absolute_path(&allowed_dir)]),
}),
..Default::default()
})
);
test.codex
.submit(Op::ExecApproval {
id: approval.effective_approval_id(),
turn_id: None,
decision: ReviewDecision::ApprovedForSession,
})
.await?;
wait_for_turn_complete(&test).await;
let first_output = first_mocks
.completion
.single_request()
.function_call_output(first_call_id)["output"]
.as_str()
.unwrap_or_default()
.to_string();
assert!(
first_output.contains("allowed"),
"expected skill sandbox to permit writes to the approved folder, got output: {first_output:?}"
);
assert_eq!(fs::read_to_string(&allowed_path)?, "allowed");
assert!(
!blocked_path.exists(),
"first run should not write outside the explicit skill sandbox"
);
assert!(
!first_output.contains("blocked-created"),
"blocked path should not have been created: {first_output:?}"
);
let second_call_id = "zsh-fork-skill-permissions-2";
let second_arguments = shell_command_arguments(&command)?;
let second_mocks = mount_function_call_agent_response(
&server,
second_call_id,
&second_arguments,
"shell_command",
)
.await;
let _ = fs::remove_file(&allowed_path);
let _ = fs::remove_file(&blocked_path);
submit_turn_with_policies(
&test,
"use $mbolin-test-skill",
AskForApproval::OnRequest,
approval_policy,
workspace_write_policy,
)
.await?;
let cached_approval = wait_for_exec_approval_request(&test).await;
let approval = wait_for_exec_approval_request(&test).await;
assert!(
cached_approval.is_none(),
"expected second run to reuse the cached session approval"
approval.is_none(),
"expected skill script execution to skip the removed skill approval path"
);
let second_output = second_mocks
wait_for_turn_complete(&test).await;
let call_output = mocks
.completion
.single_request()
.function_call_output(second_call_id)["output"]
.as_str()
.unwrap_or_default()
.to_string();
.function_call_output(call_id);
let output = call_output["output"].as_str().unwrap_or_default();
assert!(
second_output.contains("allowed"),
"expected cached skill approval to retain the explicit skill sandbox, got output: {second_output:?}"
);
assert_eq!(fs::read_to_string(&allowed_path)?, "allowed");
assert!(
!blocked_path.exists(),
"cached session approval should not widen skill execution beyond the explicit skill sandbox"
!output.contains("Execution denied: Execution forbidden by policy"),
"skill script should now be governed by the turn sandbox, not the removed skill approval gate: {output:?}"
);
assert!(
!second_output.contains("blocked-created"),
"blocked path should not have been created after cached approval: {second_output:?}"
output_shows_sandbox_denial(output) || !output.contains("allowed"),
"expected the turn sandbox to block the out-of-workspace write, got output: {output:?}"
);
assert!(
!allowed_path.exists(),
"declared skill permissions should not widen script execution beyond the turn sandbox"
);
Ok(())
}
/// This stays narrow on purpose: the important check is that `WorkspaceWrite`
/// continues to deny writes outside the workspace even under `zsh-fork`.
#[cfg(unix)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_zsh_fork_still_enforces_workspace_write_sandbox() -> Result<()> {