mirror of
https://github.com/openai/codex.git
synced 2026-04-27 09:51:03 +03:00
core: remove special execve handling for skill scripts (#15812)
This commit is contained in:
@@ -1870,7 +1870,6 @@ impl Session {
|
||||
session_telemetry,
|
||||
models_manager: Arc::clone(&models_manager),
|
||||
tool_approvals: Mutex::new(ApprovalStore::default()),
|
||||
execve_session_approvals: RwLock::new(HashMap::new()),
|
||||
skills_manager,
|
||||
plugins_manager: Arc::clone(&plugins_manager),
|
||||
mcp_manager: Arc::clone(&mcp_manager),
|
||||
|
||||
@@ -2683,7 +2683,6 @@ pub(crate) async fn make_session_and_context() -> (Session, TurnContext) {
|
||||
session_telemetry: session_telemetry.clone(),
|
||||
models_manager: Arc::clone(&models_manager),
|
||||
tool_approvals: Mutex::new(ApprovalStore::default()),
|
||||
execve_session_approvals: RwLock::new(HashMap::new()),
|
||||
skills_manager,
|
||||
plugins_manager,
|
||||
mcp_manager,
|
||||
@@ -3521,7 +3520,6 @@ pub(crate) async fn make_session_and_context_with_dynamic_tools_and_rx(
|
||||
session_telemetry: session_telemetry.clone(),
|
||||
models_manager: Arc::clone(&models_manager),
|
||||
tool_approvals: Mutex::new(ApprovalStore::default()),
|
||||
execve_session_approvals: RwLock::new(HashMap::new()),
|
||||
skills_manager,
|
||||
plugins_manager,
|
||||
mcp_manager,
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::AuthManager;
|
||||
@@ -16,14 +15,12 @@ use crate::skills_watcher::SkillsWatcher;
|
||||
use crate::state_db::StateDbHandle;
|
||||
use crate::tools::code_mode::CodeModeService;
|
||||
use crate::tools::network_approval::NetworkApprovalService;
|
||||
use crate::tools::runtimes::ExecveSessionApproval;
|
||||
use crate::tools::sandboxing::ApprovalStore;
|
||||
use crate::unified_exec::UnifiedExecProcessManager;
|
||||
use codex_analytics::AnalyticsEventsClient;
|
||||
use codex_exec_server::Environment;
|
||||
use codex_hooks::Hooks;
|
||||
use codex_otel::SessionTelemetry;
|
||||
use codex_utils_absolute_path::AbsolutePathBuf;
|
||||
use std::path::PathBuf;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::sync::RwLock;
|
||||
@@ -49,8 +46,6 @@ pub(crate) struct SessionServices {
|
||||
pub(crate) models_manager: Arc<ModelsManager>,
|
||||
pub(crate) session_telemetry: SessionTelemetry,
|
||||
pub(crate) tool_approvals: Mutex<ApprovalStore>,
|
||||
#[cfg_attr(not(unix), allow(dead_code))]
|
||||
pub(crate) execve_session_approvals: RwLock<HashMap<AbsolutePathBuf, ExecveSessionApproval>>,
|
||||
pub(crate) skills_manager: Arc<SkillsManager>,
|
||||
pub(crate) plugins_manager: Arc<PluginsManager>,
|
||||
pub(crate) mcp_manager: Arc<McpManager>,
|
||||
|
||||
@@ -4,7 +4,6 @@ Module: runtimes
|
||||
Concrete ToolRuntime implementations for specific tools. Each runtime stays
|
||||
small and focused and reuses the orchestrator for approvals + sandbox + retry.
|
||||
*/
|
||||
use crate::SkillMetadata;
|
||||
use crate::path_utils;
|
||||
use crate::shell::Shell;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
@@ -17,14 +16,6 @@ pub mod apply_patch;
|
||||
pub mod shell;
|
||||
pub mod unified_exec;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct ExecveSessionApproval {
|
||||
/// If this execve session approval is associated with a skill script, this
|
||||
/// field contains metadata about the skill.
|
||||
#[cfg_attr(not(unix), allow(dead_code))]
|
||||
pub skill: Option<SkillMetadata>,
|
||||
}
|
||||
|
||||
/// Shared helper to construct sandbox transform inputs from a tokenized command line.
|
||||
/// Validates that at least a program is present.
|
||||
pub(crate) fn build_sandbox_command(
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use super::ShellRequest;
|
||||
use crate::SkillMetadata;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::SandboxErr;
|
||||
use crate::exec::ExecCapturePolicy;
|
||||
@@ -13,8 +12,6 @@ use crate::sandboxing::ExecOptions;
|
||||
use crate::sandboxing::ExecRequest;
|
||||
use crate::sandboxing::SandboxPermissions;
|
||||
use crate::shell::ShellType;
|
||||
use crate::skills_load_input_from_config;
|
||||
use crate::tools::runtimes::ExecveSessionApproval;
|
||||
use crate::tools::runtimes::build_sandbox_command;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
@@ -31,7 +28,6 @@ use codex_protocol::models::PermissionProfile;
|
||||
use codex_protocol::permissions::FileSystemSandboxPolicy;
|
||||
use codex_protocol::permissions::NetworkSandboxPolicy;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ExecApprovalRequestSkillMetadata;
|
||||
use codex_protocol::protocol::NetworkPolicyRuleAction;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
@@ -74,9 +70,6 @@ const REJECT_SANDBOX_APPROVAL_REASON: &str =
|
||||
"approval required by policy, but AskForApproval::Granular.sandbox_approval is false";
|
||||
const REJECT_RULES_APPROVAL_REASON: &str =
|
||||
"approval required by policy rule, but AskForApproval::Granular.rules is false";
|
||||
const REJECT_SKILL_APPROVAL_REASON: &str =
|
||||
"approval required by skill, but AskForApproval::Granular.skill_approval is false";
|
||||
|
||||
fn approval_sandbox_permissions(
|
||||
sandbox_permissions: SandboxPermissions,
|
||||
additional_permissions_preapproved: bool,
|
||||
@@ -327,9 +320,6 @@ struct CoreShellActionProvider {
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
enum DecisionSource {
|
||||
SkillScript {
|
||||
skill: SkillMetadata,
|
||||
},
|
||||
PrefixRule,
|
||||
/// Often, this is `is_safe_command()`.
|
||||
UnmatchedCommandFallback,
|
||||
@@ -341,11 +331,6 @@ fn execve_prompt_is_rejected_by_policy(
|
||||
) -> Option<&'static str> {
|
||||
match (approval_policy, decision_source) {
|
||||
(AskForApproval::Never, _) => Some(PROMPT_CONFLICT_REASON),
|
||||
(AskForApproval::Granular(granular_config), DecisionSource::SkillScript { .. })
|
||||
if !granular_config.allows_skill_approval() =>
|
||||
{
|
||||
Some(REJECT_SKILL_APPROVAL_REASON)
|
||||
}
|
||||
(AskForApproval::Granular(granular_config), DecisionSource::PrefixRule)
|
||||
if !granular_config.allows_rules_approval() =>
|
||||
{
|
||||
@@ -397,17 +382,6 @@ impl CoreShellActionProvider {
|
||||
}
|
||||
}
|
||||
|
||||
fn skill_escalation_execution(skill: &SkillMetadata) -> EscalationExecution {
|
||||
let permission_profile = skill.permission_profile.clone().unwrap_or_default();
|
||||
if permission_profile.is_empty() {
|
||||
EscalationExecution::TurnDefault
|
||||
} else {
|
||||
EscalationExecution::Permissions(EscalationPermissions::PermissionProfile(
|
||||
permission_profile,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
async fn prompt(
|
||||
&self,
|
||||
program: &AbsolutePathBuf,
|
||||
@@ -415,7 +389,6 @@ impl CoreShellActionProvider {
|
||||
workdir: &AbsolutePathBuf,
|
||||
stopwatch: &Stopwatch,
|
||||
additional_permissions: Option<PermissionProfile>,
|
||||
decision_source: &DecisionSource,
|
||||
) -> anyhow::Result<ReviewDecision> {
|
||||
let command = join_program_and_argv(program, argv);
|
||||
let workdir = workdir.to_path_buf();
|
||||
@@ -442,28 +415,6 @@ impl CoreShellActionProvider {
|
||||
)
|
||||
.await;
|
||||
}
|
||||
let available_decisions = vec![
|
||||
Some(ReviewDecision::Approved),
|
||||
// Currently, ApprovedForSession is only honored for skills,
|
||||
// so only offer it for skill script approvals.
|
||||
if matches!(decision_source, DecisionSource::SkillScript { .. }) {
|
||||
Some(ReviewDecision::ApprovedForSession)
|
||||
} else {
|
||||
None
|
||||
},
|
||||
Some(ReviewDecision::Abort),
|
||||
]
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.collect();
|
||||
let skill_metadata = match decision_source {
|
||||
DecisionSource::SkillScript { skill } => {
|
||||
Some(ExecApprovalRequestSkillMetadata {
|
||||
path_to_skills_md: skill.path_to_skills_md.clone(),
|
||||
})
|
||||
}
|
||||
DecisionSource::PrefixRule | DecisionSource::UnmatchedCommandFallback => None,
|
||||
};
|
||||
session
|
||||
.request_command_approval(
|
||||
&turn,
|
||||
@@ -475,48 +426,14 @@ impl CoreShellActionProvider {
|
||||
/*network_approval_context*/ None,
|
||||
/*proposed_execpolicy_amendment*/ None,
|
||||
additional_permissions,
|
||||
skill_metadata,
|
||||
Some(available_decisions),
|
||||
/*skill_metadata*/ None,
|
||||
Some(vec![ReviewDecision::Approved, ReviewDecision::Abort]),
|
||||
)
|
||||
.await
|
||||
})
|
||||
.await)
|
||||
}
|
||||
|
||||
/// Because we should be intercepting execve(2) calls, `program` should be
|
||||
/// an absolute path. The idea is that we check to see whether it matches
|
||||
/// any skills.
|
||||
async fn find_skill(&self, program: &AbsolutePathBuf) -> Option<SkillMetadata> {
|
||||
let force_reload = false;
|
||||
let turn_config = self.turn.config.as_ref();
|
||||
let plugin_outcome = self
|
||||
.session
|
||||
.services
|
||||
.plugins_manager
|
||||
.plugins_for_config(turn_config);
|
||||
let effective_skill_roots = plugin_outcome.effective_skill_roots();
|
||||
let skills_input = skills_load_input_from_config(turn_config, effective_skill_roots);
|
||||
let skills_outcome = self
|
||||
.session
|
||||
.services
|
||||
.skills_manager
|
||||
.skills_for_cwd(&skills_input, force_reload)
|
||||
.await;
|
||||
|
||||
let program_path = program.as_path();
|
||||
for skill in skills_outcome.skills {
|
||||
// We intentionally ignore "enabled" status here for now.
|
||||
let Some(skill_root) = skill.path_to_skills_md.parent() else {
|
||||
continue;
|
||||
};
|
||||
if program_path.starts_with(skill_root.join("scripts")) {
|
||||
return Some(skill);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn process_decision(
|
||||
&self,
|
||||
@@ -540,17 +457,11 @@ impl CoreShellActionProvider {
|
||||
EscalationDecision::deny(Some("Execution forbidden by policy".to_string()))
|
||||
} else {
|
||||
match self
|
||||
.prompt(
|
||||
program,
|
||||
argv,
|
||||
workdir,
|
||||
&self.stopwatch,
|
||||
prompt_permissions,
|
||||
&decision_source,
|
||||
)
|
||||
.prompt(program, argv, workdir, &self.stopwatch, prompt_permissions)
|
||||
.await?
|
||||
{
|
||||
ReviewDecision::Approved
|
||||
| ReviewDecision::ApprovedForSession
|
||||
| ReviewDecision::ApprovedExecpolicyAmendment { .. } => {
|
||||
if needs_escalation {
|
||||
EscalationDecision::escalate(escalation_execution.clone())
|
||||
@@ -558,33 +469,6 @@ impl CoreShellActionProvider {
|
||||
EscalationDecision::run()
|
||||
}
|
||||
}
|
||||
ReviewDecision::ApprovedForSession => {
|
||||
// Currently, we only add session approvals for
|
||||
// skill scripts because we are storing only the
|
||||
// `program` whereas prefix rules may be restricted by a longer prefix.
|
||||
if let DecisionSource::SkillScript { skill } = decision_source {
|
||||
tracing::debug!(
|
||||
"Adding session approval for {program:?} due to user approval of skill script {skill:?}"
|
||||
);
|
||||
self.session
|
||||
.services
|
||||
.execve_session_approvals
|
||||
.write()
|
||||
.await
|
||||
.insert(
|
||||
program.clone(),
|
||||
ExecveSessionApproval {
|
||||
skill: Some(skill.clone()),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
if needs_escalation {
|
||||
EscalationDecision::escalate(escalation_execution.clone())
|
||||
} else {
|
||||
EscalationDecision::run()
|
||||
}
|
||||
}
|
||||
ReviewDecision::NetworkPolicyAmendment {
|
||||
network_policy_amendment,
|
||||
} => match network_policy_amendment.action {
|
||||
@@ -641,69 +525,6 @@ impl EscalationPolicy for CoreShellActionProvider {
|
||||
"Determining escalation action for command {program:?} with args {argv:?} in {workdir:?}"
|
||||
);
|
||||
|
||||
// Check to see whether `program` has an existing entry in
|
||||
// `execve_session_approvals`. If so, we can skip policy checks and user
|
||||
// prompts and go straight to allowing execution.
|
||||
let approval = {
|
||||
self.session
|
||||
.services
|
||||
.execve_session_approvals
|
||||
.read()
|
||||
.await
|
||||
.get(program)
|
||||
.cloned()
|
||||
};
|
||||
if let Some(approval) = approval {
|
||||
tracing::debug!(
|
||||
"Found session approval for {program:?}, allowing execution without further checks"
|
||||
);
|
||||
let execution = approval
|
||||
.skill
|
||||
.as_ref()
|
||||
.map(Self::skill_escalation_execution)
|
||||
.unwrap_or(EscalationExecution::TurnDefault);
|
||||
|
||||
return Ok(EscalationDecision::escalate(execution));
|
||||
}
|
||||
|
||||
// In the usual case, the execve wrapper reports the command being
|
||||
// executed in `program`, so a direct skill lookup is sufficient.
|
||||
if let Some(skill) = self.find_skill(program).await {
|
||||
// For now, scripts that look like they belong to skills bypass
|
||||
// general exec policy evaluation. Permissionless skills inherit the
|
||||
// turn sandbox directly; skills with declared permissions still
|
||||
// prompt here before applying their permission profile.
|
||||
let prompt_permissions = skill.permission_profile.clone();
|
||||
if prompt_permissions
|
||||
.as_ref()
|
||||
.is_none_or(PermissionProfile::is_empty)
|
||||
{
|
||||
tracing::debug!(
|
||||
"Matched {program:?} to permissionless skill {skill:?}, inheriting turn sandbox"
|
||||
);
|
||||
return Ok(EscalationDecision::escalate(
|
||||
EscalationExecution::TurnDefault,
|
||||
));
|
||||
}
|
||||
tracing::debug!("Matched {program:?} to skill {skill:?}, prompting for approval");
|
||||
let needs_escalation = true;
|
||||
let decision_source = DecisionSource::SkillScript {
|
||||
skill: skill.clone(),
|
||||
};
|
||||
return self
|
||||
.process_decision(
|
||||
Decision::Prompt,
|
||||
needs_escalation,
|
||||
program,
|
||||
argv,
|
||||
workdir,
|
||||
prompt_permissions,
|
||||
Self::skill_escalation_execution(&skill),
|
||||
decision_source,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
let evaluation = {
|
||||
let policy = self.policy.read().await;
|
||||
evaluate_intercepted_exec_policy(
|
||||
@@ -746,7 +567,6 @@ impl EscalationPolicy for CoreShellActionProvider {
|
||||
.macos_seatbelt_profile_extensions
|
||||
.as_ref(),
|
||||
),
|
||||
DecisionSource::SkillScript { .. } => unreachable!("handled above"),
|
||||
};
|
||||
self.process_decision(
|
||||
evaluation.decision,
|
||||
|
||||
@@ -8,7 +8,6 @@ use super::evaluate_intercepted_exec_policy;
|
||||
use super::extract_shell_script;
|
||||
use super::join_program_and_argv;
|
||||
use super::map_exec_result;
|
||||
use crate::SkillMetadata;
|
||||
#[cfg(target_os = "macos")]
|
||||
use crate::config::Constrained;
|
||||
#[cfg(target_os = "macos")]
|
||||
@@ -36,7 +35,6 @@ use codex_protocol::permissions::FileSystemSandboxEntry;
|
||||
use codex_protocol::permissions::FileSystemSandboxPolicy;
|
||||
use codex_protocol::permissions::FileSystemSpecialPath;
|
||||
use codex_protocol::permissions::NetworkSandboxPolicy;
|
||||
use codex_protocol::protocol::SkillScope;
|
||||
use codex_sandboxing::SandboxType;
|
||||
#[cfg(target_os = "macos")]
|
||||
use codex_sandboxing::seatbelt::MACOS_PATH_TO_SEATBELT_EXECUTABLE;
|
||||
@@ -83,55 +81,6 @@ fn unrestricted_file_system_sandbox_policy() -> FileSystemSandboxPolicy {
|
||||
FileSystemSandboxPolicy::unrestricted()
|
||||
}
|
||||
|
||||
fn test_skill_metadata(permission_profile: Option<PermissionProfile>) -> SkillMetadata {
|
||||
SkillMetadata {
|
||||
name: "skill".to_string(),
|
||||
description: "description".to_string(),
|
||||
short_description: None,
|
||||
interface: None,
|
||||
dependencies: None,
|
||||
policy: None,
|
||||
permission_profile,
|
||||
managed_network_override: None,
|
||||
path_to_skills_md: PathBuf::from("/tmp/skill/SKILL.md"),
|
||||
scope: SkillScope::User,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn execve_prompt_rejection_uses_skill_approval_for_skill_scripts() {
|
||||
let decision_source = super::DecisionSource::SkillScript {
|
||||
skill: test_skill_metadata(None),
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
super::execve_prompt_is_rejected_by_policy(
|
||||
AskForApproval::Granular(GranularApprovalConfig {
|
||||
sandbox_approval: true,
|
||||
rules: true,
|
||||
skill_approval: true,
|
||||
request_permissions: true,
|
||||
mcp_elicitations: true,
|
||||
}),
|
||||
&decision_source,
|
||||
),
|
||||
None,
|
||||
);
|
||||
assert_eq!(
|
||||
super::execve_prompt_is_rejected_by_policy(
|
||||
AskForApproval::Granular(GranularApprovalConfig {
|
||||
sandbox_approval: true,
|
||||
rules: true,
|
||||
skill_approval: false,
|
||||
request_permissions: true,
|
||||
mcp_elicitations: true,
|
||||
}),
|
||||
&decision_source,
|
||||
),
|
||||
Some("approval required by skill, but AskForApproval::Granular.skill_approval is false"),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn execve_prompt_rejection_keeps_prefix_rules_on_rules_flag() {
|
||||
assert_eq!(
|
||||
@@ -392,42 +341,6 @@ fn shell_request_escalation_execution_is_explicit() {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn skill_escalation_execution_uses_additional_permissions() {
|
||||
let requested_permissions = PermissionProfile {
|
||||
file_system: Some(FileSystemPermissions {
|
||||
read: None,
|
||||
write: Some(vec![
|
||||
AbsolutePathBuf::from_absolute_path("/tmp/output").unwrap(),
|
||||
]),
|
||||
}),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
CoreShellActionProvider::skill_escalation_execution(&test_skill_metadata(Some(
|
||||
requested_permissions.clone(),
|
||||
))),
|
||||
EscalationExecution::Permissions(EscalationPermissions::PermissionProfile(
|
||||
requested_permissions,
|
||||
)),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn skill_escalation_execution_ignores_empty_permissions() {
|
||||
assert_eq!(
|
||||
CoreShellActionProvider::skill_escalation_execution(&test_skill_metadata(Some(
|
||||
PermissionProfile::default(),
|
||||
))),
|
||||
EscalationExecution::TurnDefault,
|
||||
);
|
||||
assert_eq!(
|
||||
CoreShellActionProvider::skill_escalation_execution(&test_skill_metadata(None)),
|
||||
EscalationExecution::TurnDefault,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn evaluate_intercepted_exec_policy_uses_wrapper_command_when_shell_wrapper_parsing_disabled() {
|
||||
let policy_src = r#"prefix_rule(pattern = ["npm", "publish"], decision = "prompt")"#;
|
||||
|
||||
@@ -2,18 +2,13 @@
|
||||
#![cfg(unix)]
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_protocol::models::FileSystemPermissions;
|
||||
use codex_protocol::models::PermissionProfile;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::ExecApprovalRequestEvent;
|
||||
use codex_protocol::protocol::ExecApprovalRequestSkillMetadata;
|
||||
use codex_protocol::protocol::GranularApprovalConfig;
|
||||
use codex_protocol::protocol::Op;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use codex_utils_absolute_path::AbsolutePathBuf;
|
||||
use core_test_support::responses::mount_function_call_agent_response;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
@@ -23,19 +18,10 @@ use core_test_support::wait_for_event_match;
|
||||
use core_test_support::zsh_fork::build_zsh_fork_test;
|
||||
use core_test_support::zsh_fork::restrictive_workspace_write_policy;
|
||||
use core_test_support::zsh_fork::zsh_fork_runtime;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn absolute_path(path: &Path) -> AbsolutePathBuf {
|
||||
match AbsolutePathBuf::try_from(path) {
|
||||
Ok(path) => path,
|
||||
Err(err) => panic!("absolute path: {err}"),
|
||||
}
|
||||
}
|
||||
|
||||
fn write_skill_metadata(home: &Path, name: &str, contents: &str) -> Result<()> {
|
||||
let metadata_dir = home.join("skills").join(name).join("agents");
|
||||
fs::create_dir_all(&metadata_dir)?;
|
||||
@@ -44,7 +30,7 @@ fn write_skill_metadata(home: &Path, name: &str, contents: &str) -> Result<()> {
|
||||
}
|
||||
|
||||
fn shell_command_arguments(command: &str) -> Result<String> {
|
||||
Ok(serde_json::to_string(&json!({
|
||||
Ok(serde_json::to_string(&serde_json::json!({
|
||||
"command": command,
|
||||
"timeout_ms": 500,
|
||||
}))?)
|
||||
@@ -78,18 +64,6 @@ async fn submit_turn_with_policies(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_skill_with_shell_script(home: &Path, name: &str, script_name: &str) -> Result<PathBuf> {
|
||||
write_skill_with_shell_script_contents(
|
||||
home,
|
||||
name,
|
||||
script_name,
|
||||
r#"#!/bin/sh
|
||||
echo 'zsh-fork-stdout'
|
||||
echo 'zsh-fork-stderr' >&2
|
||||
"#,
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn write_skill_with_shell_script_contents(
|
||||
home: &Path,
|
||||
@@ -121,15 +95,13 @@ description: {name} skill
|
||||
Ok(script_path)
|
||||
}
|
||||
|
||||
fn skill_script_command(test: &TestCodex, script_name: &str) -> Result<(String, String)> {
|
||||
fn skill_script_command(test: &TestCodex, script_name: &str) -> Result<String> {
|
||||
let script_path = fs::canonicalize(
|
||||
test.codex_home_path()
|
||||
.join("skills/mbolin-test-skill/scripts")
|
||||
.join(script_name),
|
||||
)?;
|
||||
let script_path_str = script_path.to_string_lossy().into_owned();
|
||||
let command = shlex::try_join([script_path_str.as_str()])?;
|
||||
Ok((script_path_str, command))
|
||||
Ok(shlex::try_join([script_path.to_string_lossy().as_ref()])?)
|
||||
}
|
||||
|
||||
async fn wait_for_exec_approval_request(test: &TestCodex) -> Option<ExecApprovalRequestEvent> {
|
||||
@@ -154,325 +126,12 @@ fn output_shows_sandbox_denial(output: &str) -> bool {
|
||||
|| output.contains("Read-only file system")
|
||||
}
|
||||
|
||||
/// Focus on the approval payload: the skill should prompt before execution and
|
||||
/// only advertise the permissions declared in its metadata.
|
||||
#[cfg(unix)]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_zsh_fork_prompts_for_skill_script_execution() -> Result<()> {
|
||||
async fn shell_zsh_fork_skill_scripts_ignore_declared_permissions() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let Some(runtime) = zsh_fork_runtime("zsh-fork skill prompt test")? else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let tool_call_id = "zsh-fork-skill-call";
|
||||
let test = build_zsh_fork_test(
|
||||
&server,
|
||||
runtime,
|
||||
AskForApproval::OnRequest,
|
||||
SandboxPolicy::new_workspace_write_policy(),
|
||||
|home| {
|
||||
write_skill_with_shell_script(home, "mbolin-test-skill", "hello-mbolin.sh").unwrap();
|
||||
write_skill_metadata(
|
||||
home,
|
||||
"mbolin-test-skill",
|
||||
r#"
|
||||
permissions:
|
||||
file_system:
|
||||
read:
|
||||
- "./data"
|
||||
write:
|
||||
- "./output"
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
||||
let (script_path_str, command) = skill_script_command(&test, "hello-mbolin.sh")?;
|
||||
let arguments = shell_command_arguments(&command)?;
|
||||
let mocks =
|
||||
mount_function_call_agent_response(&server, tool_call_id, &arguments, "shell_command")
|
||||
.await;
|
||||
|
||||
submit_turn_with_policies(
|
||||
&test,
|
||||
"use $mbolin-test-skill",
|
||||
AskForApproval::OnRequest,
|
||||
SandboxPolicy::new_workspace_write_policy(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let maybe_approval = wait_for_exec_approval_request(&test).await;
|
||||
let approval = match maybe_approval {
|
||||
Some(approval) => approval,
|
||||
None => {
|
||||
let call_output = mocks
|
||||
.completion
|
||||
.single_request()
|
||||
.function_call_output(tool_call_id);
|
||||
panic!(
|
||||
"expected exec approval request before completion; function_call_output={call_output:?}"
|
||||
);
|
||||
}
|
||||
};
|
||||
assert_eq!(approval.call_id, tool_call_id);
|
||||
assert_eq!(approval.command, vec![script_path_str.clone()]);
|
||||
assert_eq!(
|
||||
approval.available_decisions,
|
||||
Some(vec![
|
||||
ReviewDecision::Approved,
|
||||
ReviewDecision::ApprovedForSession,
|
||||
ReviewDecision::Abort,
|
||||
])
|
||||
);
|
||||
assert_eq!(
|
||||
approval.additional_permissions,
|
||||
Some(PermissionProfile {
|
||||
file_system: Some(FileSystemPermissions {
|
||||
read: Some(vec![absolute_path(
|
||||
&test.codex_home_path().join("skills/mbolin-test-skill/data"),
|
||||
)]),
|
||||
write: Some(vec![absolute_path(
|
||||
&test
|
||||
.codex_home_path()
|
||||
.join("skills/mbolin-test-skill/output"),
|
||||
)]),
|
||||
}),
|
||||
..Default::default()
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
approval.skill_metadata,
|
||||
Some(ExecApprovalRequestSkillMetadata {
|
||||
path_to_skills_md: test
|
||||
.codex_home_path()
|
||||
.join("skills/mbolin-test-skill/agents/openai.yaml"),
|
||||
})
|
||||
);
|
||||
|
||||
test.codex
|
||||
.submit(Op::ExecApproval {
|
||||
id: approval.effective_approval_id(),
|
||||
turn_id: None,
|
||||
decision: ReviewDecision::Denied,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_turn_complete(&test).await;
|
||||
|
||||
let call_output = mocks
|
||||
.completion
|
||||
.single_request()
|
||||
.function_call_output(tool_call_id);
|
||||
let output = call_output["output"].as_str().unwrap_or_default();
|
||||
assert!(
|
||||
output.contains("Execution denied: User denied execution"),
|
||||
"expected rejection marker in function_call_output: {output:?}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_zsh_fork_skill_script_reject_policy_with_sandbox_approval_false_still_prompts()
|
||||
-> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let Some(runtime) = zsh_fork_runtime("zsh-fork reject false skill prompt test")? else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let approval_policy = AskForApproval::Granular(GranularApprovalConfig {
|
||||
sandbox_approval: true,
|
||||
rules: true,
|
||||
skill_approval: true,
|
||||
request_permissions: true,
|
||||
mcp_elicitations: true,
|
||||
});
|
||||
let server = start_mock_server().await;
|
||||
let tool_call_id = "zsh-fork-skill-reject-false";
|
||||
let test = build_zsh_fork_test(
|
||||
&server,
|
||||
runtime,
|
||||
approval_policy,
|
||||
SandboxPolicy::new_workspace_write_policy(),
|
||||
|home| {
|
||||
write_skill_with_shell_script(home, "mbolin-test-skill", "hello-mbolin.sh").unwrap();
|
||||
write_skill_metadata(
|
||||
home,
|
||||
"mbolin-test-skill",
|
||||
r#"
|
||||
permissions:
|
||||
file_system:
|
||||
write:
|
||||
- "./output"
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
||||
let (script_path_str, command) = skill_script_command(&test, "hello-mbolin.sh")?;
|
||||
let arguments = shell_command_arguments(&command)?;
|
||||
let mocks =
|
||||
mount_function_call_agent_response(&server, tool_call_id, &arguments, "shell_command")
|
||||
.await;
|
||||
|
||||
submit_turn_with_policies(
|
||||
&test,
|
||||
"use $mbolin-test-skill",
|
||||
approval_policy,
|
||||
SandboxPolicy::new_workspace_write_policy(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let maybe_approval = wait_for_exec_approval_request(&test).await;
|
||||
let approval = match maybe_approval {
|
||||
Some(approval) => approval,
|
||||
None => {
|
||||
let call_output = mocks
|
||||
.completion
|
||||
.single_request()
|
||||
.function_call_output(tool_call_id);
|
||||
panic!(
|
||||
"expected exec approval request before completion; function_call_output={call_output:?}"
|
||||
);
|
||||
}
|
||||
};
|
||||
assert_eq!(approval.call_id, tool_call_id);
|
||||
assert_eq!(approval.command, vec![script_path_str]);
|
||||
|
||||
test.codex
|
||||
.submit(Op::ExecApproval {
|
||||
id: approval.effective_approval_id(),
|
||||
turn_id: None,
|
||||
decision: ReviewDecision::Denied,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_turn_complete(&test).await;
|
||||
|
||||
let call_output = mocks
|
||||
.completion
|
||||
.single_request()
|
||||
.function_call_output(tool_call_id);
|
||||
let output = call_output["output"].as_str().unwrap_or_default();
|
||||
assert!(
|
||||
output.contains("Execution denied: User denied execution"),
|
||||
"expected rejection marker in function_call_output: {output:?}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_zsh_fork_skill_script_reject_policy_with_sandbox_approval_true_still_prompts()
|
||||
-> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let Some(runtime) =
|
||||
zsh_fork_runtime("zsh-fork reject sandbox approval true skill prompt test")?
|
||||
else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let approval_policy = AskForApproval::Granular(GranularApprovalConfig {
|
||||
sandbox_approval: false,
|
||||
rules: true,
|
||||
skill_approval: true,
|
||||
request_permissions: true,
|
||||
mcp_elicitations: true,
|
||||
});
|
||||
let server = start_mock_server().await;
|
||||
let tool_call_id = "zsh-fork-skill-reject-true";
|
||||
let test = build_zsh_fork_test(
|
||||
&server,
|
||||
runtime,
|
||||
approval_policy,
|
||||
SandboxPolicy::new_workspace_write_policy(),
|
||||
|home| {
|
||||
write_skill_with_shell_script(home, "mbolin-test-skill", "hello-mbolin.sh").unwrap();
|
||||
write_skill_metadata(
|
||||
home,
|
||||
"mbolin-test-skill",
|
||||
r#"
|
||||
permissions:
|
||||
file_system:
|
||||
write:
|
||||
- "./output"
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
||||
let (_, command) = skill_script_command(&test, "hello-mbolin.sh")?;
|
||||
let arguments = shell_command_arguments(&command)?;
|
||||
let mocks =
|
||||
mount_function_call_agent_response(&server, tool_call_id, &arguments, "shell_command")
|
||||
.await;
|
||||
|
||||
submit_turn_with_policies(
|
||||
&test,
|
||||
"use $mbolin-test-skill",
|
||||
approval_policy,
|
||||
SandboxPolicy::new_workspace_write_policy(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let maybe_approval = wait_for_exec_approval_request(&test).await;
|
||||
let approval = match maybe_approval {
|
||||
Some(approval) => approval,
|
||||
None => {
|
||||
let call_output = mocks
|
||||
.completion
|
||||
.single_request()
|
||||
.function_call_output(tool_call_id);
|
||||
panic!(
|
||||
"expected exec approval request before completion; function_call_output={call_output:?}"
|
||||
);
|
||||
}
|
||||
};
|
||||
assert_eq!(approval.call_id, tool_call_id);
|
||||
|
||||
test.codex
|
||||
.submit(Op::ExecApproval {
|
||||
id: approval.effective_approval_id(),
|
||||
turn_id: None,
|
||||
decision: ReviewDecision::Denied,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_turn_complete(&test).await;
|
||||
|
||||
let call_output = mocks
|
||||
.completion
|
||||
.single_request()
|
||||
.function_call_output(tool_call_id);
|
||||
let output = call_output["output"].as_str().unwrap_or_default();
|
||||
assert!(
|
||||
output.contains("Execution denied: User denied execution"),
|
||||
"expected rejection marker in function_call_output: {output:?}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_zsh_fork_skill_script_reject_policy_with_skill_approval_true_skips_prompt()
|
||||
-> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let Some(runtime) = zsh_fork_runtime("zsh-fork reject skill approval true skill prompt test")?
|
||||
else {
|
||||
let Some(runtime) = zsh_fork_runtime("zsh-fork skill script ignores permissions test")? else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
@@ -483,364 +142,30 @@ async fn shell_zsh_fork_skill_script_reject_policy_with_skill_approval_true_skip
|
||||
request_permissions: true,
|
||||
mcp_elicitations: true,
|
||||
});
|
||||
let server = start_mock_server().await;
|
||||
let tool_call_id = "zsh-fork-skill-reject-skill-approval-true";
|
||||
let test = build_zsh_fork_test(
|
||||
&server,
|
||||
runtime,
|
||||
approval_policy,
|
||||
SandboxPolicy::new_workspace_write_policy(),
|
||||
|home| {
|
||||
write_skill_with_shell_script(home, "mbolin-test-skill", "hello-mbolin.sh").unwrap();
|
||||
write_skill_metadata(
|
||||
home,
|
||||
"mbolin-test-skill",
|
||||
r#"
|
||||
permissions:
|
||||
file_system:
|
||||
write:
|
||||
- "./output"
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
||||
let (_, command) = skill_script_command(&test, "hello-mbolin.sh")?;
|
||||
let arguments = shell_command_arguments(&command)?;
|
||||
let mocks =
|
||||
mount_function_call_agent_response(&server, tool_call_id, &arguments, "shell_command")
|
||||
.await;
|
||||
|
||||
submit_turn_with_policies(
|
||||
&test,
|
||||
"use $mbolin-test-skill",
|
||||
approval_policy,
|
||||
SandboxPolicy::new_workspace_write_policy(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let approval = wait_for_exec_approval_request(&test).await;
|
||||
assert!(
|
||||
approval.is_none(),
|
||||
"expected reject skill approval policy to skip exec approval"
|
||||
);
|
||||
|
||||
wait_for_turn_complete(&test).await;
|
||||
|
||||
let call_output = mocks
|
||||
.completion
|
||||
.single_request()
|
||||
.function_call_output(tool_call_id);
|
||||
let output = call_output["output"].as_str().unwrap_or_default();
|
||||
assert!(
|
||||
output.contains("Execution denied: Execution forbidden by policy"),
|
||||
"expected policy rejection marker in function_call_output: {output:?}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Permissionless skills should inherit the turn sandbox without prompting.
|
||||
#[cfg(unix)]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_zsh_fork_skill_without_permissions_inherits_turn_sandbox() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let Some(runtime) = zsh_fork_runtime("zsh-fork inherited skill sandbox test")? else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let outside_dir = tempfile::tempdir_in(std::env::current_dir()?)?;
|
||||
let outside_path = outside_dir
|
||||
.path()
|
||||
.join("zsh-fork-skill-inherited-sandbox.txt");
|
||||
let outside_path_quoted = shlex::try_join([outside_path.to_string_lossy().as_ref()])?;
|
||||
let script_contents = format!(
|
||||
"#!/bin/sh\nprintf '%s' forbidden > {outside_path_quoted}\ncat {outside_path_quoted}\n"
|
||||
);
|
||||
let outside_path_for_hook = outside_path.clone();
|
||||
let script_contents_for_hook = script_contents.clone();
|
||||
let workspace_write_policy = restrictive_workspace_write_policy();
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let test = build_zsh_fork_test(
|
||||
&server,
|
||||
runtime,
|
||||
AskForApproval::OnRequest,
|
||||
workspace_write_policy.clone(),
|
||||
move |home| {
|
||||
let _ = fs::remove_file(&outside_path_for_hook);
|
||||
write_skill_with_shell_script_contents(
|
||||
home,
|
||||
"mbolin-test-skill",
|
||||
"sandboxed.sh",
|
||||
&script_contents_for_hook,
|
||||
)
|
||||
.unwrap();
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
||||
let (_, command) = skill_script_command(&test, "sandboxed.sh")?;
|
||||
|
||||
let first_call_id = "zsh-fork-skill-permissions-1";
|
||||
let first_arguments = shell_command_arguments(&command)?;
|
||||
let first_mocks = mount_function_call_agent_response(
|
||||
&server,
|
||||
first_call_id,
|
||||
&first_arguments,
|
||||
"shell_command",
|
||||
)
|
||||
.await;
|
||||
|
||||
submit_turn_with_policies(
|
||||
&test,
|
||||
"use $mbolin-test-skill",
|
||||
AskForApproval::OnRequest,
|
||||
workspace_write_policy.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let first_approval = wait_for_exec_approval_request(&test).await;
|
||||
assert!(
|
||||
first_approval.is_none(),
|
||||
"expected permissionless skill script to skip exec approval"
|
||||
);
|
||||
|
||||
wait_for_turn_complete(&test).await;
|
||||
|
||||
let first_output = first_mocks
|
||||
.completion
|
||||
.single_request()
|
||||
.function_call_output(first_call_id)["output"]
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
assert!(
|
||||
output_shows_sandbox_denial(&first_output) || !first_output.contains("forbidden"),
|
||||
"expected inherited turn sandbox denial on first run, got output: {first_output:?}"
|
||||
);
|
||||
assert!(
|
||||
!outside_path.exists(),
|
||||
"first run should not write outside the turn sandbox"
|
||||
);
|
||||
|
||||
let second_call_id = "zsh-fork-skill-permissions-2";
|
||||
let second_arguments = shell_command_arguments(&command)?;
|
||||
let second_mocks = mount_function_call_agent_response(
|
||||
&server,
|
||||
second_call_id,
|
||||
&second_arguments,
|
||||
"shell_command",
|
||||
)
|
||||
.await;
|
||||
|
||||
submit_turn_with_policies(
|
||||
&test,
|
||||
"use $mbolin-test-skill",
|
||||
AskForApproval::OnRequest,
|
||||
workspace_write_policy,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let cached_approval = wait_for_exec_approval_request(&test).await;
|
||||
assert!(
|
||||
cached_approval.is_none(),
|
||||
"expected permissionless skill rerun to continue skipping exec approval"
|
||||
);
|
||||
|
||||
let second_output = second_mocks
|
||||
.completion
|
||||
.single_request()
|
||||
.function_call_output(second_call_id)["output"]
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
assert!(
|
||||
output_shows_sandbox_denial(&second_output) || !second_output.contains("forbidden"),
|
||||
"expected cached skill approval to retain inherited turn sandboxing, got output: {second_output:?}"
|
||||
);
|
||||
assert!(
|
||||
!outside_path.exists(),
|
||||
"cached session approval should not widen a permissionless skill to full access"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Empty skill permissions should behave like no skill override and inherit the
|
||||
/// turn sandbox without prompting.
|
||||
#[cfg(unix)]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_zsh_fork_skill_with_empty_permissions_inherits_turn_sandbox() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let Some(runtime) = zsh_fork_runtime("zsh-fork empty skill permissions test")? else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let outside_dir = tempfile::tempdir_in(std::env::current_dir()?)?;
|
||||
let outside_path = outside_dir
|
||||
.path()
|
||||
.join("zsh-fork-skill-empty-permissions.txt");
|
||||
let outside_path_quoted = shlex::try_join([outside_path.to_string_lossy().as_ref()])?;
|
||||
let script_contents = format!(
|
||||
"#!/bin/sh\nprintf '%s' allowed > {outside_path_quoted}\ncat {outside_path_quoted}\n"
|
||||
);
|
||||
let outside_path_for_hook = outside_path.clone();
|
||||
let script_contents_for_hook = script_contents.clone();
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let test = build_zsh_fork_test(
|
||||
&server,
|
||||
runtime,
|
||||
AskForApproval::OnRequest,
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
move |home| {
|
||||
let _ = fs::remove_file(&outside_path_for_hook);
|
||||
write_skill_with_shell_script_contents(
|
||||
home,
|
||||
"mbolin-test-skill",
|
||||
"sandboxed.sh",
|
||||
&script_contents_for_hook,
|
||||
)
|
||||
.unwrap();
|
||||
write_skill_metadata(home, "mbolin-test-skill", "permissions: {}\n").unwrap();
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
||||
let (_, command) = skill_script_command(&test, "sandboxed.sh")?;
|
||||
|
||||
let first_call_id = "zsh-fork-skill-empty-permissions-1";
|
||||
let first_arguments = shell_command_arguments(&command)?;
|
||||
let first_mocks = mount_function_call_agent_response(
|
||||
&server,
|
||||
first_call_id,
|
||||
&first_arguments,
|
||||
"shell_command",
|
||||
)
|
||||
.await;
|
||||
|
||||
submit_turn_with_policies(
|
||||
&test,
|
||||
"use $mbolin-test-skill",
|
||||
AskForApproval::OnRequest,
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let first_approval = wait_for_exec_approval_request(&test).await;
|
||||
assert!(
|
||||
first_approval.is_none(),
|
||||
"expected empty skill permissions to skip exec approval"
|
||||
);
|
||||
|
||||
wait_for_turn_complete(&test).await;
|
||||
|
||||
let first_output = first_mocks
|
||||
.completion
|
||||
.single_request()
|
||||
.function_call_output(first_call_id)["output"]
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
assert!(
|
||||
first_output.contains("allowed"),
|
||||
"expected empty skill permissions to inherit full-access turn sandbox, got output: {first_output:?}"
|
||||
);
|
||||
assert_eq!(fs::read_to_string(&outside_path)?, "allowed");
|
||||
|
||||
let second_call_id = "zsh-fork-skill-empty-permissions-2";
|
||||
let second_arguments = shell_command_arguments(&command)?;
|
||||
let second_mocks = mount_function_call_agent_response(
|
||||
&server,
|
||||
second_call_id,
|
||||
&second_arguments,
|
||||
"shell_command",
|
||||
)
|
||||
.await;
|
||||
|
||||
let _ = fs::remove_file(&outside_path);
|
||||
|
||||
submit_turn_with_policies(
|
||||
&test,
|
||||
"use $mbolin-test-skill",
|
||||
AskForApproval::OnRequest,
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let cached_approval = wait_for_exec_approval_request(&test).await;
|
||||
assert!(
|
||||
cached_approval.is_none(),
|
||||
"expected empty-permissions skill rerun to continue skipping exec approval"
|
||||
);
|
||||
|
||||
let second_output = second_mocks
|
||||
.completion
|
||||
.single_request()
|
||||
.function_call_output(second_call_id)["output"]
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
assert!(
|
||||
second_output.contains("allowed"),
|
||||
"expected cached empty-permissions skill approval to inherit the turn sandbox, got output: {second_output:?}"
|
||||
);
|
||||
assert_eq!(fs::read_to_string(&outside_path)?, "allowed");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// The validation to focus on is: writes to the skill-approved folder succeed,
|
||||
/// and writes to an unrelated folder fail, both before and after cached approval.
|
||||
#[cfg(unix)]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_zsh_fork_skill_session_approval_enforces_skill_permissions() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let Some(runtime) = zsh_fork_runtime("zsh-fork explicit skill sandbox test")? else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let outside_dir = tempfile::tempdir_in(std::env::current_dir()?)?;
|
||||
let allowed_dir = outside_dir.path().join("allowed-output");
|
||||
let blocked_dir = outside_dir.path().join("blocked-output");
|
||||
fs::create_dir_all(&allowed_dir)?;
|
||||
fs::create_dir_all(&blocked_dir)?;
|
||||
|
||||
let allowed_path = allowed_dir.join("allowed.txt");
|
||||
let blocked_path = blocked_dir.join("blocked.txt");
|
||||
let allowed_path_quoted = shlex::try_join([allowed_path.to_string_lossy().as_ref()])?;
|
||||
let blocked_path_quoted = shlex::try_join([blocked_path.to_string_lossy().as_ref()])?;
|
||||
let script_contents = format!(
|
||||
"#!/bin/sh\nprintf '%s' allowed > {allowed_path_quoted}\ncat {allowed_path_quoted}\nprintf '%s' forbidden > {blocked_path_quoted}\nif [ -f {blocked_path_quoted} ]; then echo blocked-created; fi\n"
|
||||
"#!/bin/sh\nprintf '%s' allowed > {allowed_path_quoted}\nif [ -f {allowed_path_quoted} ]; then cat {allowed_path_quoted}; fi\n"
|
||||
);
|
||||
let allowed_dir_for_hook = allowed_dir.clone();
|
||||
let allowed_path_for_hook = allowed_path.clone();
|
||||
let blocked_path_for_hook = blocked_path.clone();
|
||||
let script_contents_for_hook = script_contents.clone();
|
||||
|
||||
let permissions_yaml = format!(
|
||||
"permissions:\n file_system:\n write:\n - \"{}\"\n",
|
||||
allowed_dir.display()
|
||||
);
|
||||
|
||||
let workspace_write_policy = restrictive_workspace_write_policy();
|
||||
let server = start_mock_server().await;
|
||||
let allowed_path_for_hook = allowed_path.clone();
|
||||
let script_contents_for_hook = script_contents.clone();
|
||||
let test = build_zsh_fork_test(
|
||||
&server,
|
||||
runtime,
|
||||
AskForApproval::OnRequest,
|
||||
approval_policy,
|
||||
workspace_write_policy.clone(),
|
||||
move |home| {
|
||||
let _ = fs::remove_file(&allowed_path_for_hook);
|
||||
let _ = fs::remove_file(&blocked_path_for_hook);
|
||||
fs::create_dir_all(&allowed_dir_for_hook).unwrap();
|
||||
fs::create_dir_all(blocked_path_for_hook.parent().unwrap()).unwrap();
|
||||
write_skill_with_shell_script_contents(
|
||||
home,
|
||||
"mbolin-test-skill",
|
||||
@@ -853,128 +178,49 @@ async fn shell_zsh_fork_skill_session_approval_enforces_skill_permissions() -> R
|
||||
)
|
||||
.await?;
|
||||
|
||||
let (script_path_str, command) = skill_script_command(&test, "sandboxed.sh")?;
|
||||
|
||||
let first_call_id = "zsh-fork-skill-permissions-1";
|
||||
let first_arguments = shell_command_arguments(&command)?;
|
||||
let first_mocks = mount_function_call_agent_response(
|
||||
&server,
|
||||
first_call_id,
|
||||
&first_arguments,
|
||||
"shell_command",
|
||||
)
|
||||
.await;
|
||||
let command = skill_script_command(&test, "sandboxed.sh")?;
|
||||
let call_id = "zsh-fork-skill-script-ignores-permissions";
|
||||
let arguments = shell_command_arguments(&command)?;
|
||||
let mocks =
|
||||
mount_function_call_agent_response(&server, call_id, &arguments, "shell_command").await;
|
||||
|
||||
submit_turn_with_policies(
|
||||
&test,
|
||||
"use $mbolin-test-skill",
|
||||
AskForApproval::OnRequest,
|
||||
workspace_write_policy.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let maybe_approval = wait_for_exec_approval_request(&test).await;
|
||||
let approval = match maybe_approval {
|
||||
Some(approval) => approval,
|
||||
None => panic!("expected exec approval request before completion"),
|
||||
};
|
||||
assert_eq!(approval.call_id, first_call_id);
|
||||
assert_eq!(approval.command, vec![script_path_str.clone()]);
|
||||
assert_eq!(
|
||||
approval.additional_permissions,
|
||||
Some(PermissionProfile {
|
||||
file_system: Some(FileSystemPermissions {
|
||||
read: None,
|
||||
write: Some(vec![absolute_path(&allowed_dir)]),
|
||||
}),
|
||||
..Default::default()
|
||||
})
|
||||
);
|
||||
|
||||
test.codex
|
||||
.submit(Op::ExecApproval {
|
||||
id: approval.effective_approval_id(),
|
||||
turn_id: None,
|
||||
decision: ReviewDecision::ApprovedForSession,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_turn_complete(&test).await;
|
||||
|
||||
let first_output = first_mocks
|
||||
.completion
|
||||
.single_request()
|
||||
.function_call_output(first_call_id)["output"]
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
assert!(
|
||||
first_output.contains("allowed"),
|
||||
"expected skill sandbox to permit writes to the approved folder, got output: {first_output:?}"
|
||||
);
|
||||
assert_eq!(fs::read_to_string(&allowed_path)?, "allowed");
|
||||
assert!(
|
||||
!blocked_path.exists(),
|
||||
"first run should not write outside the explicit skill sandbox"
|
||||
);
|
||||
assert!(
|
||||
!first_output.contains("blocked-created"),
|
||||
"blocked path should not have been created: {first_output:?}"
|
||||
);
|
||||
|
||||
let second_call_id = "zsh-fork-skill-permissions-2";
|
||||
let second_arguments = shell_command_arguments(&command)?;
|
||||
let second_mocks = mount_function_call_agent_response(
|
||||
&server,
|
||||
second_call_id,
|
||||
&second_arguments,
|
||||
"shell_command",
|
||||
)
|
||||
.await;
|
||||
|
||||
let _ = fs::remove_file(&allowed_path);
|
||||
let _ = fs::remove_file(&blocked_path);
|
||||
|
||||
submit_turn_with_policies(
|
||||
&test,
|
||||
"use $mbolin-test-skill",
|
||||
AskForApproval::OnRequest,
|
||||
approval_policy,
|
||||
workspace_write_policy,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let cached_approval = wait_for_exec_approval_request(&test).await;
|
||||
let approval = wait_for_exec_approval_request(&test).await;
|
||||
assert!(
|
||||
cached_approval.is_none(),
|
||||
"expected second run to reuse the cached session approval"
|
||||
approval.is_none(),
|
||||
"expected skill script execution to skip the removed skill approval path"
|
||||
);
|
||||
|
||||
let second_output = second_mocks
|
||||
wait_for_turn_complete(&test).await;
|
||||
|
||||
let call_output = mocks
|
||||
.completion
|
||||
.single_request()
|
||||
.function_call_output(second_call_id)["output"]
|
||||
.as_str()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
.function_call_output(call_id);
|
||||
let output = call_output["output"].as_str().unwrap_or_default();
|
||||
assert!(
|
||||
second_output.contains("allowed"),
|
||||
"expected cached skill approval to retain the explicit skill sandbox, got output: {second_output:?}"
|
||||
);
|
||||
assert_eq!(fs::read_to_string(&allowed_path)?, "allowed");
|
||||
assert!(
|
||||
!blocked_path.exists(),
|
||||
"cached session approval should not widen skill execution beyond the explicit skill sandbox"
|
||||
!output.contains("Execution denied: Execution forbidden by policy"),
|
||||
"skill script should now be governed by the turn sandbox, not the removed skill approval gate: {output:?}"
|
||||
);
|
||||
assert!(
|
||||
!second_output.contains("blocked-created"),
|
||||
"blocked path should not have been created after cached approval: {second_output:?}"
|
||||
output_shows_sandbox_denial(output) || !output.contains("allowed"),
|
||||
"expected the turn sandbox to block the out-of-workspace write, got output: {output:?}"
|
||||
);
|
||||
assert!(
|
||||
!allowed_path.exists(),
|
||||
"declared skill permissions should not widen script execution beyond the turn sandbox"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// This stays narrow on purpose: the important check is that `WorkspaceWrite`
|
||||
/// continues to deny writes outside the workspace even under `zsh-fork`.
|
||||
#[cfg(unix)]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_zsh_fork_still_enforces_workspace_write_sandbox() -> Result<()> {
|
||||
|
||||
Reference in New Issue
Block a user