guardian initial feedback / tweaks (#13897)

## Summary
- remove the remaining model-visible guardian-specific `on-request`
prompt additions so enabling the feature does not change the main
approval-policy instructions
- neutralize user-facing guardian wording to talk about automatic
approval review / approval requests rather than a second reviewer or
only sandbox escalations
- tighten guardian retry-context handling so agent-authored
`justification` stays in the structured action JSON and is not also
injected as raw retry context
- simplify guardian review plumbing in core by deleting dead
prompt-append paths and trimming some request/transcript setup code

## Notable Changes
- delete the dead `permissions/approval_policy/guardian.md` append path
and stop threading `guardian_approval_enabled` through model-facing
developer-instruction builders
- rename the experimental feature copy to `Automatic approval review`
and update the `/experimental` snapshot text accordingly
- make approval-review status strings generic across shell, patch,
network, and MCP review types
- forward real sandbox/network retry reasons for shell and unified-exec
guardian review, but do not pass agent-authored justification as raw
retry context
- simplify `guardian.rs` by removing the one-field request wrapper,
deduping reasoning-effort selection, and cleaning up transcript entry
collection

## Testing
- `just fmt`
- full validation left to CI

---------

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Charley Cunningham
2026-03-09 09:25:24 -07:00
committed by GitHub
parent 2bc3e52a91
commit f23fcd6ced
16 changed files with 421 additions and 352 deletions

View File

@@ -408,8 +408,6 @@ const APPROVAL_POLICY_ON_REQUEST_RULE: &str =
include_str!("prompts/permissions/approval_policy/on_request_rule.md");
const APPROVAL_POLICY_ON_REQUEST_RULE_REQUEST_PERMISSION: &str =
include_str!("prompts/permissions/approval_policy/on_request_rule_request_permission.md");
const GUARDIAN_APPROVAL_FEATURE: &str =
include_str!("prompts/permissions/approval_policy/guardian.md");
const SANDBOX_MODE_DANGER_FULL_ACCESS: &str =
include_str!("prompts/permissions/sandbox_mode/danger_full_access.md");
@@ -427,7 +425,6 @@ impl DeveloperInstructions {
pub fn from(
approval_policy: AskForApproval,
guardian_approval_enabled: bool,
exec_policy: &Policy,
request_permission_enabled: bool,
) -> DeveloperInstructions {
@@ -451,14 +448,7 @@ impl DeveloperInstructions {
AskForApproval::Never => APPROVAL_POLICY_NEVER.to_string(),
AskForApproval::UnlessTrusted => APPROVAL_POLICY_UNLESS_TRUSTED.to_string(),
AskForApproval::OnFailure => APPROVAL_POLICY_ON_FAILURE.to_string(),
AskForApproval::OnRequest => {
let mut instructions = on_request_instructions();
if guardian_approval_enabled {
instructions.push_str("\n\n");
instructions.push_str(GUARDIAN_APPROVAL_FEATURE);
}
instructions
}
AskForApproval::OnRequest => on_request_instructions(),
AskForApproval::Reject(reject_config) => {
let on_request_instructions = on_request_instructions();
let sandbox_approval = reject_config.sandbox_approval;
@@ -521,7 +511,6 @@ impl DeveloperInstructions {
pub fn from_policy(
sandbox_policy: &SandboxPolicy,
approval_policy: AskForApproval,
guardian_approval_enabled: bool,
exec_policy: &Policy,
cwd: &Path,
request_permission_enabled: bool,
@@ -546,7 +535,6 @@ impl DeveloperInstructions {
sandbox_mode,
network_access,
approval_policy,
guardian_approval_enabled,
exec_policy,
writable_roots,
request_permission_enabled,
@@ -571,7 +559,6 @@ impl DeveloperInstructions {
sandbox_mode: SandboxMode,
network_access: NetworkAccess,
approval_policy: AskForApproval,
guardian_approval_enabled: bool,
exec_policy: &Policy,
writable_roots: Option<Vec<WritableRoot>>,
request_permission_enabled: bool,
@@ -585,7 +572,6 @@ impl DeveloperInstructions {
))
.concat(DeveloperInstructions::from(
approval_policy,
guardian_approval_enabled,
exec_policy,
request_permission_enabled,
))
@@ -1667,7 +1653,6 @@ mod tests {
SandboxMode::WorkspaceWrite,
NetworkAccess::Enabled,
AskForApproval::OnRequest,
false,
&Policy::empty(),
None,
false,
@@ -1697,7 +1682,6 @@ mod tests {
let instructions = DeveloperInstructions::from_policy(
&policy,
AskForApproval::UnlessTrusted,
false,
&Policy::empty(),
&PathBuf::from("/tmp"),
false,
@@ -1720,7 +1704,6 @@ mod tests {
SandboxMode::WorkspaceWrite,
NetworkAccess::Enabled,
AskForApproval::OnRequest,
false,
&exec_policy,
None,
false,
@@ -1738,7 +1721,6 @@ mod tests {
SandboxMode::WorkspaceWrite,
NetworkAccess::Enabled,
AskForApproval::OnRequest,
false,
&Policy::empty(),
None,
true,
@@ -1749,23 +1731,6 @@ mod tests {
assert!(text.contains("additional_permissions"));
}
#[test]
fn includes_guardian_feature_guidance_for_on_request_when_enabled() {
let instructions = DeveloperInstructions::from_permissions_with_network(
SandboxMode::WorkspaceWrite,
NetworkAccess::Enabled,
AskForApproval::OnRequest,
true,
&Policy::empty(),
None,
false,
);
let text = instructions.into_text();
assert!(text.contains("guardian subagent"));
assert!(text.contains("approval prompts"));
}
#[test]
fn render_command_prefix_list_sorts_by_len_then_total_len_then_alphabetical() {
let prefixes = vec![